diff --git a/.gitignore b/.gitignore index d661735ae..7a7282ac9 100644 --- a/.gitignore +++ b/.gitignore @@ -22,12 +22,12 @@ parquettest.sh .idea/ workfiles/ chr20/ -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/lib/ -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/main/ -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/test/ -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/test.js -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/pileup.js -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/pileup.js.map -mango-cli/src/main/webapp/resources/node_modules/pileup/dist/node_modules/ -mango-cli/src/main/webapp/resources/node_modules/pileup/node_modules/ -chr20/ +mango-play-* +mango-play/public/resources/node_modules/pileup/dist/lib/ +mango-play/public/resources/node_modules/pileup/dist/main/ +mango-play/public/resources/node_modules/pileup/dist/test/ +mango-play/public/resources/node_modules/pileup/dist/test.js +mango-play/public/resources/node_modules/pileup/dist/pileup.js +mango-play/public/resources/node_modules/pileup/dist/pileup.js.map +mango-play/public/resources/node_modules/pileup/dist/node_modules/ +mango-play/public/resources/node_modules/pileup/node_modules/ diff --git a/.gitmodules b/.gitmodules index be9ae56a0..f79f524fb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "mango-cli/src/main/webapp/resources/pileup.js"] - path = mango-cli/src/main/webapp/resources/pileup.js +[submodule "mango-play/public/resources/pileup.js"] + path = mango-play/public/resources/pileup.js url = https://github.com/akmorrow13/pileup.js.git diff --git a/README.md b/README.md index 4085d215e..c1064d138 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#mango +# mango A scalable genome browser built on top of the [ADAM](https://github.com/bigdatagenomics/adam) genomics processing engine. Apache 2 licensed. @@ -30,7 +30,9 @@ mango is packaged via [appassembler](http://mojo.codehaus.org/appassembler/appas Running an example script: ``` -From the main folder of mango, run ./example-files/run-example.sh to see a demonstration of chromosome 17, region 7500000-7515000. +From the main folder of mango, run ./scripts/run-example.sh to see a demonstration of chromosome 17, region 7500000-7515000. +``` +**Note:** In local mode, relative paths are referenced from the mango-play submodule. To reference your own files, put in the absolute path. ``` For help launching the script, run `bin/mango-submit -h` ```` diff --git a/bin/compute-mango-classpath.sh b/bin/compute-mango-classpath.sh index bd0317723..406f5a18e 100755 --- a/bin/compute-mango-classpath.sh +++ b/bin/compute-mango-classpath.sh @@ -28,8 +28,8 @@ REPO="$SCRIPT_DIR/repo" # Fallback to source repo if [ ! -f $MANGO_CMD ]; then -MANGO_CMD="$SCRIPT_DIR/mango-cli/target/appassembler/bin/mango" -REPO="$SCRIPT_DIR/mango-cli/target/appassembler/repo" +MANGO_CMD="$SCRIPT_DIR/mango-play/target/appassembler/bin/mango" +REPO="$SCRIPT_DIR/mango-play/target/appassembler/repo" fi if [ ! -f "$MANGO_CMD" ]; then diff --git a/bin/compute-spark-jars.sh b/bin/compute-spark-jars.sh new file mode 100755 index 000000000..32344313e --- /dev/null +++ b/bin/compute-spark-jars.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# Licensed to Big Data Genomics (BDG) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The BDG licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +if [ -z "${SPARK_HOME}" ]; then + export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)" +fi + +. "${SPARK_HOME}"/bin/load-spark-env.sh + +# Find the java binary +if [ -n "${JAVA_HOME}" ]; then + RUNNER="${JAVA_HOME}/bin/java" +else + if [ `command -v java` ]; then + RUNNER="java" + else + echo "JAVA_HOME is not set" >&2 + exit 1 + fi +fi + +# Find assembly jar +SPARK_ASSEMBLY_JAR= +if [ -f "${SPARK_HOME}/RELEASE" ]; then + ASSEMBLY_DIR="${SPARK_HOME}/lib" +else + ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION" +fi + +GREP_OPTIONS= +num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)" +if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a "$SPARK_PREPEND_CLASSES" != "1" ]; then + echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2 + echo "You need to build Spark before running this program." 1>&2 + exit 1 +fi +if [ -d "$ASSEMBLY_DIR" ]; then + ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)" + if [ "$num_jars" -gt "1" ]; then + echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2 + echo "$ASSEMBLY_JARS" 1>&2 + echo "Please remove all but one jar." 1>&2 + exit 1 + fi +fi + +echo "${ASSEMBLY_DIR}/${ASSEMBLY_JARS}" diff --git a/bin/mango-install.sh b/bin/mango-install.sh new file mode 100755 index 000000000..f4a7c9ae5 --- /dev/null +++ b/bin/mango-install.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to Big Data Genomics (BDG) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The BDG licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +SCRIPT_DIR="$(cd `dirname $0`/..; pwd)" + +# build mango-play distribution +mvn -f $SCRIPT_DIR play2:dist -pl mango-play + +# unzip new dist directory +unzip -o "$SCRIPT_DIR/mango-play/target/mango-play-0.0.1-SNAPSHOT-dist.zip" -d $SCRIPT_DIR diff --git a/bin/mango-submit b/bin/mango-submit index e4a4d8c80..92fcfe069 100755 --- a/bin/mango-submit +++ b/bin/mango-submit @@ -74,13 +74,29 @@ if [ -z "$SPARK_SUBMIT" ]; then fi echo "Using SPARK_SUBMIT=$SPARK_SUBMIT" -# submit the job to Spark -"$SPARK_SUBMIT" \ - --class org.bdgenomics.mango.cli.VizReads \ - --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ - --conf spark.kryo.registrator=org.bdgenomics.mango.serialization.MangoKryoRegistrator \ - --conf spark.dynamicAllocation.executorIdleTimeout=10d \ - --conf spark.driver.memory=5g \ - $SPARK_ARGS \ - $MANGO_CLI_JAR \ - $MANGO_ARGS +# exports +export SPARK_ARGS=$SPARK_ARGS +export MANGO_MAIN_JAR=$MANGO_CLI_JAR +export MANGO_ARGS=$MANGO_ARGS + +# generate application key +APP_KEY=$(openssl rand -base64 15) +export APPLICATION_SECRET=$APP_KEY + +SCRIPT_DIR="$(cd `dirname $0`/..; pwd)" + +# get version of mango from maven. This will be used to fetch the +# most recently installed mango-play distribution +MANGO_VERSION=`echo -e 'setns x=http://maven.apache.org/POM/4.0.0\ncat /x:project/x:version/text()' | xmllint --shell $SCRIPT_DIR/pom.xml | grep -v /` + +classpath="$SCRIPT_DIR/mango-play-$MANGO_VERSION/lib/*" + +# Get list of provided jars for spark +SPARK_JARS=$("$SCRIPT_DIR"/bin/compute-spark-jars.sh) + +# modify classpath to include spark and hadoop +# spark must come second do to dependency conflicts +classpath="$classpath:$SPARK_JARS" + +# this is copied from playframework's autogenerated start script +exec java $* -cp "$classpath" play.core.server.NettyServer $scriptdir diff --git a/mango-cli/pom.xml b/mango-cli/pom.xml deleted file mode 100644 index 043f806d7..000000000 --- a/mango-cli/pom.xml +++ /dev/null @@ -1,217 +0,0 @@ - - - 4.0.0 - - - org.bdgenomics.mango - mango-parent - 0.0.1-SNAPSHOT - ../pom.xml - - mango-cli - jar - mango-cli: an ADAM-based genomics visualization toolkit - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - src/main/scala - - - - - add-test-source - generate-test-sources - - add-test-source - - - - src/test/scala - - - - - - - org.codehaus.mojo - appassembler-maven-plugin - - - - org.bdgenomics.mango.cli.VizReads - mango - - - - - - package - - assemble - - - - - - org.scalatest - scalatest-maven-plugin - - - - - - - net.fnothaft - ga4gh-format - - - com.esotericsoftware.kryo - kryo - compile - - - com.github.samtools - htsjdk - compile - - - it.unimi.dsi - fastutil - compile - - - net.liftweb - lift-json_${scala.version.prefix} - compile - - - org.apache.avro - avro - compile - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.parquet - parquet-avro - compile - - - org.apache.parquet - parquet-scala_2.10 - compile - - - org.apache.spark - spark-core_${scala.version.prefix} - provided - - - org.bdgenomics.adam - adam-cli${spark.version.prefix}${scala.version.prefix} - compile - - - org.bdgenomics.adam - adam-core${spark.version.prefix}${scala.version.prefix} - compile - - - org.bdgenomics.adam - adam-core${spark.version.prefix}${scala.version.prefix} - test-jar - test - - - org.bdgenomics.bdg-formats - bdg-formats - compile - - - org.bdgenomics.utils - utils-cli_${scala.version.prefix} - compile - - - org.bdgenomics.utils - utils-intervalrdd_${scala.version.prefix} - compile - - - org.bdgenomics.utils - utils-io_${scala.version.prefix} - compile - - - org.bdgenomics.utils - utils-metrics_${scala.version.prefix} - compile - - - org.bdgenomics.utils - utils-misc_${scala.version.prefix} - test-jar - test - - - org.eclipse.jetty - jetty-server - compile - - - org.eclipse.jetty - jetty-webapp - compile - - - org.scalatest - scalatest_${scala.version.prefix} - test - - - org.scalatra - scalatra_${scala.version.prefix} - compile - - - org.scalatra - scalatra-scalatest_${scala.version.prefix} - test - - - org.scalatra - scalatra-specs2_${scala.version.prefix} - test - - - org.scalatra.scalate - scalate-core_${scala.version.prefix} - compile - - - org.bdgenomics.mango - mango-core - ${project.version} - - - org.seqdoop - hadoop-bam - compile - - - diff --git a/mango-cli/src/main/scala/org/bdgenomics/mango/cli/VizReads.scala b/mango-cli/src/main/scala/org/bdgenomics/mango/cli/VizReads.scala deleted file mode 100644 index 33f06b150..000000000 --- a/mango-cli/src/main/scala/org/bdgenomics/mango/cli/VizReads.scala +++ /dev/null @@ -1,713 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.mango.cli - -import java.io.FileNotFoundException -import net.liftweb.json.Serialization.write -import net.liftweb.json._ -import org.apache.spark.SparkContext -import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary } -import org.bdgenomics.mango.core.util.{ VizUtils, VizCacheIndicator } -import org.bdgenomics.mango.filters._ -import org.bdgenomics.mango.layout.{ VariantJson, GenotypeJson } -import org.bdgenomics.mango.models._ -import org.bdgenomics.utils.cli._ -import org.bdgenomics.utils.instrumentation.Metrics -import org.bdgenomics.utils.misc.Logging -import org.fusesource.scalate.TemplateEngine -import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -import org.scalatra._ - -object VizTimers extends Metrics { - //HTTP requests - val ReadsRequest = timer("GET reads") - val CoverageRequest = timer("GET coverage") - val FreqRequest = timer("GET frequency") - val VarRequest = timer("GET variants") - val VarFreqRequest = timer("Get variant frequency") - val FeatRequest = timer("GET features") - val AlignmentRequest = timer("GET alignment") - - //RDD operations - val FreqRDDTimer = timer("RDD Freq operations") - val VarRDDTimer = timer("RDD Var operations") - val FeatRDDTimer = timer("RDD Feat operations") - val RefRDDTimer = timer("RDD Ref operations") - val GetPartChunkTimer = timer("Calculate block chunk") - - //Generating Json - val MakingTrack = timer("Making Track") - val DoingCollect = timer("Doing Collect") - val PrintReferenceTimer = timer("JSON get reference string") -} - -object VizReads extends BDGCommandCompanion with Logging { - - val commandName: String = "viz" - val commandDescription: String = "Genomic visualization for ADAM" - implicit val formats = net.liftweb.json.DefaultFormats - - var sc: SparkContext = null - var server: org.eclipse.jetty.server.Server = null - var globalDict: SequenceDictionary = null - - // Gene URL - var genes: Option[String] = None - - // Structures storing data types. All but reference is optional - var annotationRDD: AnnotationMaterialization = null - var readsData: Option[AlignmentRecordMaterialization] = None - - var coverageData: Option[CoverageMaterialization] = None - - var variantContextData: Option[VariantContextMaterialization] = None - - var featureData: Option[FeatureMaterialization] = None - - // variables tracking whether optional datatypes were loaded - def readsExist: Boolean = readsData.isDefined - def coveragesExist: Boolean = coverageData.isDefined - def variantsExist: Boolean = variantContextData.isDefined - def featuresExist: Boolean = featureData.isDefined - - // placeholder for indicators - val region = ReferenceRegion("N", 0, 1) - - // reads cache - object readsWait - var readsCache: Map[String, String] = Map.empty[String, String] - var readsIndicator = VizCacheIndicator(region, 1) - - // coverage reads cache - object readsCoverageWait - var readsCoverageCache: Map[String, String] = Map.empty[String, String] - var readsCoverageIndicator = VizCacheIndicator(region, 1) - - // coverage cache - object coverageWait - var coverageCache: Map[String, String] = Map.empty[String, String] - var coverageIndicator = VizCacheIndicator(region, 1) - - // variant cache - object variantsWait - var variantsCache: Map[String, String] = Map.empty[String, String] - var variantsIndicator = VizCacheIndicator(region, 1) - var showGenotypes: Boolean = false - - // features cache - object featuresWait - var featuresCache: Map[String, String] = Map.empty[String, String] - var featuresIndicator = VizCacheIndicator(region, 1) - - // regions to prefetch during discovery. sent to front - // end for visual processing - var prefetchedRegions: List[(ReferenceRegion, Double)] = List() - - // used to determine size of data tiles - var chunkSize: Int = 1000 - - // thresholds used for visualization binning and limits - var screenSize: Int = 1000 - - // HTTP ERROR RESPONSES - object errors { - var outOfBounds = NotFound("Region not found in Reference Sequence Dictionary") - var largeRegion = RequestEntityTooLarge("Region too large") - var unprocessableFile = UnprocessableEntity("File type not supported") - var notFound = NotFound("File not found") - def noContent(region: ReferenceRegion): ActionResult = { - val msg = s"No content available at ${region.toString}" - NoContent(Map.empty, msg) - } - } - - def apply(cmdLine: Array[String]): BDGCommand = { - new VizReads(Args4j[VizReadsArgs](cmdLine)) - } - - /** - * Returns stringified version of sequence dictionary - * - * @param dict: dictionary to format to a string - * @return List of sequence dictionary strings of form referenceName:0-referenceName.length - */ - def formatDictionaryOpts(dict: SequenceDictionary): String = { - val sorted = dict.records.sortBy(_.length).reverse - sorted.map(r => r.name + ":0-" + r.length).mkString(",") - } - - /** - * Returns stringified version of sequence dictionary - * - * @param regions: regions to format to string - * @return list of strinified reference regions - */ - def formatClickableRegions(regions: List[(ReferenceRegion, Double)]): String = { - regions.map(r => s"${r._1.referenceName}:${r._1.start}-${r._1.end}" + - s"-${BigDecimal(r._2).setScale(2, BigDecimal.RoundingMode.HALF_UP).toDouble}").mkString(",") - } - - //Correctly shuts down the server - def quit() { - val thread = new Thread { - override def run() { - try { - log.info("Shutting down the server") - server.stop() - log.info("Server has stopped") - } catch { - case e: Exception => { - log.info("Error when stopping Jetty server: " + e.getMessage, e) - } - } - } - } - thread.start() - } - -} - -case class ReferenceJson(reference: String, position: Long) - -class VizReadsArgs extends Args4jBase with ParquetArgs { - @Argument(required = true, metaVar = "reference", usage = "The reference file to view, required", index = 0) - var referencePath: String = null - - @Args4jOption(required = false, name = "-genes", usage = "Gene URL.") - var genePath: String = null - - @Args4jOption(required = false, name = "-reads", usage = "A list of reads files to view, separated by commas (,)") - var readsPaths: String = null - - @Args4jOption(required = false, name = "-coverage", usage = "A list of coverage files to view, separated by commas (,)") - var coveragePaths: String = null - - @Args4jOption(required = false, name = "-variants", usage = "A list of variants files to view, separated by commas (,). " + - "Vcf files require a corresponding tbi index.") - var variantsPaths: String = null - - @Args4jOption(required = false, name = "-show_genotypes", usage = "Shows genotypes if available in variant files.") - var showGenotypes: Boolean = false - - @Args4jOption(required = false, name = "-features", usage = "The feature files to view, separated by commas (,)") - var featurePaths: String = null - - @Args4jOption(required = false, name = "-port", usage = "The port to bind to for visualization. The default is 8080.") - var port: Int = 8080 - - @Args4jOption(required = false, name = "-test", usage = "For debugging purposes.") - var testMode: Boolean = false - - @Args4jOption(required = false, name = "-discover", usage = "This turns on discovery mode on start up.") - var discoveryMode: Boolean = false -} - -class VizServlet extends ScalatraServlet { - implicit val formats = net.liftweb.json.DefaultFormats - - get("/?") { - redirect("/overall") - } - - get("/quit") { - VizReads.quit() - } - - get("/overall") { - contentType = "text/html" - val templateEngine = new TemplateEngine - // set initial referenceRegion so it is defined. pick first chromosome to view - val firstChr = VizReads.globalDict.records.head.name - session("referenceRegion") = ReferenceRegion(firstChr, 1, 100) - templateEngine.layout("mango-cli/src/main/webapp/WEB-INF/layouts/overall.ssp", - Map("dictionary" -> VizReads.formatDictionaryOpts(VizReads.globalDict), - "regions" -> VizReads.formatClickableRegions(VizReads.prefetchedRegions))) - } - - get("/setContig/:ref") { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, params("end").toLong) - session("referenceRegion") = viewRegion - } - - get("/browser") { - contentType = "text/html" - // if session variable for reference region is not yet set, randomly set it - try { - session("referenceRegion") - } catch { - case e: Exception => - val firstChr = VizReads.globalDict.records.head.name - session("referenceRegion") = ReferenceRegion(firstChr, 0, 100) - } - - val templateEngine = new TemplateEngine - // set initial referenceRegion so it is defined - val region = session("referenceRegion").asInstanceOf[ReferenceRegion] - val indicator = VizCacheIndicator(region, 1) - VizReads.readsIndicator = indicator - VizReads.variantsIndicator = indicator - VizReads.featuresIndicator = indicator - - // generate file keys for front end - val readsSamples: Option[List[(String, Option[String])]] = try { - val reads = VizReads.readsData.get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r)) - - // check if there are precomputed coverage files for reads. If so, send this information to the frontend - // to avoid extra coverage computation - if (VizReads.coverageData.isDefined) { - Some(reads.map(r => { - val coverage = VizReads.coverageData.get.getFiles.map(c => LazyMaterialization.filterKeyFromFile(c)) - .find(c => { - c.contains(r) - }) - (r, coverage) - })) - } else Some(reads.map((_, None))) - - } catch { - case e: Exception => None - } - - val coverageSamples = try { - val coverage = VizReads.coverageData.get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r)) - - // filter out coverage samples that will be displayed with reads - if (readsSamples.isDefined) { - val readsCoverage = readsSamples.get.map(_._2).flatten - Some(coverage.filter(c => !readsCoverage.contains(c))) - } else Some(coverage) - } catch { - case e: Exception => None - } - - val variantSamples = try { - if (VizReads.showGenotypes) - Some(VizReads.variantContextData.get.getGenotypeSamples().map(r => (LazyMaterialization.filterKeyFromFile(r._1), r._2.mkString(",")))) - else Some(VizReads.variantContextData.get.getFiles.map(r => (LazyMaterialization.filterKeyFromFile(r), ""))) - } catch { - case e: Exception => None - } - - val featureSamples = try { - Some(VizReads.featureData.get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r))) - } catch { - case e: Exception => None - } - - templateEngine.layout("mango-cli/src/main/webapp/WEB-INF/layouts/browser.ssp", - Map("dictionary" -> VizReads.formatDictionaryOpts(VizReads.globalDict), - "genes" -> VizReads.genes, - "reads" -> readsSamples, - "coverage" -> coverageSamples, - "variants" -> variantSamples, - "features" -> featureSamples, - "contig" -> session("referenceRegion").asInstanceOf[ReferenceRegion].referenceName, - "start" -> session("referenceRegion").asInstanceOf[ReferenceRegion].start.toString, - "end" -> session("referenceRegion").asInstanceOf[ReferenceRegion].end.toString)) - } - - get("/reference/:ref") { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, params("end").toLong) - session("referenceRegion") = viewRegion - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - Ok(write(VizReads.annotationRDD.getReferenceString(viewRegion))) - } else VizReads.errors.outOfBounds - } - - get("/sequenceDictionary") { - Ok(write(VizReads.annotationRDD.getSequenceDictionary.records)) - } - - get("/reads/:key/:ref") { - VizTimers.ReadsRequest.time { - - if (!VizReads.readsExist) { - VizReads.errors.notFound - } else { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, - VizUtils.getEnd(params("end").toLong, VizReads.globalDict(params("ref")))) - val key: String = params("key") - contentType = "json" - - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - var results: Option[String] = None - VizReads.readsWait.synchronized { - // region was already collected, grab from cache - if (viewRegion != VizReads.readsIndicator.region) { - VizReads.readsCache = VizReads.readsData.get.getJson(viewRegion) - VizReads.readsIndicator = VizCacheIndicator(viewRegion, 1) - } - results = VizReads.readsCache.get(key) - } - if (results.isDefined) { - Ok(results.get) - } else VizReads.errors.noContent(viewRegion) - } else VizReads.errors.outOfBounds - } - } - } - - get("/coverage/:key/:ref") { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, - VizUtils.getEnd(params("end").toLong, VizReads.globalDict(params("ref")))) - val key: String = params("key") - val binning: Int = - try - params("binning").toInt - catch { - case e: Exception => 1 - } - getCoverage(viewRegion, key, binning) - } - - get("/reads/coverage/:key/:ref") { - VizTimers.ReadsRequest.time { - - if (!VizReads.readsExist) { - VizReads.errors.notFound - } else { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, - VizUtils.getEnd(params("end").toLong, VizReads.globalDict(params("ref")))) - val key: String = params("key") - contentType = "json" - - // get all coverage files that have been loaded - val coverageFiles = - if (VizReads.coverageData.isDefined) { - Some(VizReads.coverageData.get.getFiles.map(f => LazyMaterialization.filterKeyFromFile(f))) - } else None - - // check if there is a precomputed coverage file for this reads file - if (coverageFiles.isDefined && coverageFiles.get.contains(key)) { - // TODO: I dont know if this is correct for getting keys - val binning: Int = - try - params("binning").toInt - catch { - case e: Exception => 1 - } - - getCoverage(viewRegion, key, binning) - } else { - // no precomputed coverage - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - var results: Option[String] = None - VizReads.readsCoverageWait.synchronized { - // region was already collected, grab from cache - if (viewRegion != VizReads.readsCoverageIndicator.region) { - VizReads.readsCoverageCache = VizReads.readsData.get.getCoverage(viewRegion) - VizReads.readsIndicator = VizCacheIndicator(viewRegion, 1) - } - results = VizReads.readsCoverageCache.get(key) - } - if (results.isDefined) { - Ok(results.get) - } else VizReads.errors.noContent(viewRegion) - } else VizReads.errors.outOfBounds - } - } - } - } - - get("/variants/:key/:ref") { - VizTimers.VarRequest.time { - if (!VizReads.variantsExist) - VizReads.errors.notFound - else { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, - VizUtils.getEnd(params("end").toLong, VizReads.globalDict(params("ref")))) - val key: String = params("key") - contentType = "json" - - // if region is in bounds of reference, return data - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - var results: Option[String] = None - val binning: Int = - try { - params("binning").toInt - } catch { - case e: Exception => 1 - } - VizReads.variantsWait.synchronized { - // region was already collected, grab from cache - if (VizCacheIndicator(viewRegion, binning) != VizReads.variantsIndicator) { - VizReads.variantsCache = VizReads.variantContextData.get.getJson(viewRegion, - VizReads.showGenotypes, - binning) - VizReads.variantsIndicator = VizCacheIndicator(viewRegion, binning) - } - results = VizReads.variantsCache.get(key) - } - if (results.isDefined) { - // extract variants only and parse to stringified json - Ok(results.get) - } else VizReads.errors.noContent(viewRegion) - } else VizReads.errors.outOfBounds - } - } - } - - get("/features/:key/:ref") { - VizTimers.FeatRequest.time { - if (!VizReads.featuresExist) - VizReads.errors.notFound - else { - val viewRegion = ReferenceRegion(params("ref"), params("start").toLong, - VizUtils.getEnd(params("end").toLong, VizReads.globalDict(params("ref")))) - val key: String = params("key") - contentType = "json" - - // if region is in bounds of reference, return data - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - var results: Option[String] = None - val binning: Int = - try { - params("binning").toInt - } catch { - case e: Exception => 1 - } - VizReads.featuresWait.synchronized { - // region was already collected, grab from cache - if (VizCacheIndicator(viewRegion, binning) != VizReads.featuresIndicator) { - VizReads.featuresCache = VizReads.featureData.get.getJson(viewRegion, binning) - VizReads.featuresIndicator = VizCacheIndicator(viewRegion, binning) - } - results = VizReads.featuresCache.get(key) - } - if (results.isDefined) { - Ok(results.get) - } else VizReads.errors.noContent(viewRegion) - } else VizReads.errors.outOfBounds - } - } - } - - /** - * Gets Coverage for a get Request. This is used to get both Reads based coverage and generic coverage. - * @param viewRegion ReferenceRegion to view coverage over - * @param key key for coverage file (see LazyMaterialization) - * @return ActionResult of coverage json - */ - def getCoverage(viewRegion: ReferenceRegion, key: String, binning: Int = 1): ActionResult = { - VizTimers.CoverageRequest.time { - if (!VizReads.coveragesExist) { - VizReads.errors.notFound - } else { - contentType = "json" - val dictOpt = VizReads.globalDict(viewRegion.referenceName) - if (dictOpt.isDefined) { - var results: Option[String] = None - VizReads.coverageWait.synchronized { - // region was already collected, grab from cache - if (viewRegion != VizReads.coverageIndicator.region) { - VizReads.coverageCache = VizReads.coverageData.get.getCoverage(viewRegion, binning) - VizReads.coverageIndicator = VizCacheIndicator(viewRegion, 1) - } - results = VizReads.coverageCache.get(key) - } - if (results.isDefined) { - Ok(results.get) - } else VizReads.errors.noContent(viewRegion) - } else VizReads.errors.outOfBounds - } - } - } -} - -class VizReads(protected val args: VizReadsArgs) extends BDGSparkCommand[VizReadsArgs] with Logging { - val companion: BDGCommandCompanion = VizReads - - override def run(sc: SparkContext): Unit = { - VizReads.sc = sc - - // choose prefetch size - val prefetch = - if (sc.isLocal) 10000 - else 100000 - - // initialize all datasets - initAnnotations - initAlignments - initCoverages - initVariantContext - initFeatures - - // run discovery mode if it is specified in the startup script - if (args.discoveryMode) { - VizReads.prefetchedRegions = discoverFrequencies() - preprocess(VizReads.prefetchedRegions) - } - - // check whether genePath was supplied - if (args.genePath != null) { - VizReads.genes = Some(args.genePath) - } - - // start server - if (!args.testMode) startServer() - - /* - * Initialize required reference file - */ - def initAnnotations() = { - val referencePath = Option(args.referencePath).getOrElse({ - throw new FileNotFoundException("reference file not provided") - }) - - VizReads.annotationRDD = new AnnotationMaterialization(sc, referencePath) - VizReads.globalDict = VizReads.annotationRDD.getSequenceDictionary - } - - /* - * Initialize loaded alignment files - */ - def initAlignments = { - if (Option(args.readsPaths).isDefined) { - val readsPaths = args.readsPaths.split(",").toList - - if (readsPaths.nonEmpty) { - VizReads.readsData = Some(new AlignmentRecordMaterialization(sc, readsPaths, VizReads.globalDict, Some(prefetch))) - } - } - } - - /* - * Initialize coverage files - */ - def initCoverages = { - if (Option(args.coveragePaths).isDefined) { - val coveragePaths = args.coveragePaths.split(",").toList - - if (coveragePaths.nonEmpty) { - VizReads.coverageData = Some(new CoverageMaterialization(sc, coveragePaths, VizReads.globalDict, Some(prefetch))) - } - } - } - - /** - * Initialize loaded variant files - */ - def initVariantContext() = { - // set flag for visualizing genotypes - VizReads.showGenotypes = args.showGenotypes - - if (Option(args.variantsPaths).isDefined) { - val variantsPaths = args.variantsPaths.split(",").toList - - if (variantsPaths.nonEmpty) { - VizReads.variantContextData = Some(new VariantContextMaterialization(sc, variantsPaths, VizReads.globalDict, Some(prefetch))) - } - } - } - - /** - * Initialize loaded feature files - */ - def initFeatures() = { - val featurePaths = Option(args.featurePaths) - if (featurePaths.isDefined) { - val featurePaths = args.featurePaths.split(",").toList - if (featurePaths.nonEmpty) { - VizReads.featureData = Some(new FeatureMaterialization(sc, featurePaths, VizReads.globalDict, Some(prefetch))) - } - } - } - - /** - * Runs total data scan over all feature, variant and coverage files, calculating the normalied frequency at all - * windows in the genome. - * - * @return Returns list of windowed regions in the genome and their corresponding normalized frequencies - */ - def discoverFrequencies(): List[(ReferenceRegion, Double)] = { - - val discovery = Discovery(VizReads.annotationRDD.getSequenceDictionary) - var regions: List[(ReferenceRegion, Double)] = List() - - // get feature frequency - if (VizReads.featuresExist) { - val featureRegions = VizReads.featureData.get.getAll().map(ReferenceRegion.unstranded(_)) - regions = regions ++ discovery.getFrequencies(featureRegions) - } - - // get variant frequency - if (VizReads.variantsExist) { - val variantRegions = VizReads.variantContextData.get.getAll().map(r => ReferenceRegion(r.variant)) - regions = regions ++ discovery.getFrequencies(variantRegions) - } - - // get coverage frequency - // Note: calculating coverage frequency is an expensive operation. Only perform if sc is not local. - if (VizReads.coveragesExist && !sc.isLocal) { - val coverageRegions = VizReads.coverageData.get.getAll().map(ReferenceRegion(_)) - regions = regions ++ discovery.getFrequencies(coverageRegions) - } - - // group all regions together and reduce down for all data types - regions = regions.groupBy(_._1).map(r => (r._1, r._2.map(a => a._2).sum)).toList - - // normalize and filter by regions with data - val max = regions.map(_._2).reduceOption(_ max _).getOrElse(1.0) - regions.map(r => (r._1, r._2 / max)) - .filter(_._2 > 0.0) - } - - /** - * preprocesses data by loading specified regions into memory for reads, coverage, variants and features - * - * @param regions Regions to be preprocessed - */ - def preprocess(regions: List[(ReferenceRegion, Double)]) = { - // select two of the highest occupied regions to load - // The number of selected regions is low to reduce unnecessary loading while - // jump starting Thread setup for Spark on the specific data files - val selectedRegions = regions.sortBy(_._2).takeRight(2).map(_._1) - - for (region <- selectedRegions) { - if (VizReads.featureData.isDefined) - VizReads.featureData.get.get(region) - if (VizReads.readsData.isDefined) - VizReads.readsData.get.get(region) - if (VizReads.coverageData.isDefined) - VizReads.coverageData.get.get(region) - if (VizReads.variantContextData.isDefined) - VizReads.variantContextData.get.get(region) - } - } - - /** - * Starts server once on startup - */ - def startServer() = { - VizReads.server = new org.eclipse.jetty.server.Server(args.port) - val handlers = new org.eclipse.jetty.server.handler.ContextHandlerCollection() - VizReads.server.setHandler(handlers) - handlers.addHandler(new org.eclipse.jetty.webapp.WebAppContext("mango-cli/src/main/webapp", "/")) - VizReads.server.start() - println("View the visualization at: " + args.port) - println("Quit at: /quit") - VizReads.server.join() - } - - } -} diff --git a/mango-cli/src/main/webapp/WEB-INF/layouts/browser.ssp b/mango-cli/src/main/webapp/WEB-INF/layouts/browser.ssp deleted file mode 100644 index a779f3420..000000000 --- a/mango-cli/src/main/webapp/WEB-INF/layouts/browser.ssp +++ /dev/null @@ -1,164 +0,0 @@ -<%@ val dictionary: String %> -<%@ val genes: Option[String] %> -<%@ val coverage: Option[List[String]] %> -<%@ val reads: Option[List[(String, Option[String])]] %> -<%@ val variants: Option[List[(String, String)]] %> -<%@ val features: Option[List[String]] %> -<%@ val contig: String %> -<%@ val start: String %> -<%@ val end: String %> - - - - -<% render("templates/head.ssp") %> -<% render("navigation/navigation.ssp", Map("isBrowser" -> true)) %> -<% render("templates/sidebar.ssp") %> - -
-
-
- - - - - diff --git a/mango-cli/src/main/webapp/WEB-INF/layouts/templates/head.ssp b/mango-cli/src/main/webapp/WEB-INF/layouts/templates/head.ssp deleted file mode 100644 index 9b416e621..000000000 --- a/mango-cli/src/main/webapp/WEB-INF/layouts/templates/head.ssp +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mango-cli/src/main/webapp/WEB-INF/web.xml b/mango-cli/src/main/webapp/WEB-INF/web.xml deleted file mode 100644 index 341859e76..000000000 --- a/mango-cli/src/main/webapp/WEB-INF/web.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - mango - org.bdgenomics.mango.cli.VizServlet - - - mango - /* - - diff --git a/mango-cli/src/main/webapp/resources/pileup.js b/mango-cli/src/main/webapp/resources/pileup.js deleted file mode 160000 index 4ab931d54..000000000 --- a/mango-cli/src/main/webapp/resources/pileup.js +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4ab931d548d876109bd64e84329d18c6c8569aee diff --git a/mango-cli/src/main/webapp/stylesheets/main.css b/mango-cli/src/main/webapp/stylesheets/main.css deleted file mode 100644 index 39935da54..000000000 --- a/mango-cli/src/main/webapp/stylesheets/main.css +++ /dev/null @@ -1,310 +0,0 @@ -#navbar .nav-tabs .dropdown-menu, #navbar .nav-pills .dropdown-menu, #navbar .navbar .dropdown-menu { - margin-top: 0; } -#navbar .dropdown-menu li { - width: 100%; } -#navbar .dropdown-menu li:hover { - color: white; - background-color: #4682B4 !important; } -#navbar .dropdown:hover .dropdown-menu { - display: block; } -#navbar li label { - width: 100%; - font-weight: 300; } -#navbar input[type=checkbox] { - display: none; } -#navbar input[type=checkbox]:checked + li { - background: #5BC0DE; } - -.chrChart { - width: 100%; - margin-left: auto; - margin-right: auto; } - .chrChart div { - background-color: steelblue; - text-align: left; - padding: 1px 1px 1px 10px; - margin: 1px; - color: white; } - -.absolute { - position: absolute; } - -.alignmentData { - margin-top: 3px; - padding-top: 3px; - border-top: 1px solid black; } - -.mergedReads { - border-bottom: 1px solid #e7e7e7; - margin-bottom: 5px; } - -body { - font-family: 'Open Sans', sans-serif; - font-weight: 300; } - -h1 { - text-align: center; - margin: 10px 0 0 0; - color: #333333; - font-weight: 400; - text-transform: uppercase; } - -h2 { - text-align: center; - margin: 0 0 10px 0; - color: #999999; - font-weight: 400; } - -.highlight { - font-weight: 700; } - -ul { - list-style-type: none; - margin: 0; - padding: 0 5px 0 5px; - float: left; } - -li { - display: inline; - float: left; - padding: 0 0 0 5px; } - -.axis path, .axis line { - fill: none; - stroke: black; - shape-rendering: crispEdges; } -.axis text { - font-family: sans-serif; - font-size: 11px; } - -div.tooltip { - position: absolute; - text-align: center; - padding: 2px; - background: #999999; - border: 0px; - pointer-events: none; - color: white; - margin-top: 70px; } - div.tooltip p { - margin: 0px; } - -input[type="text"] { - height: 22px; - padding: 4px 12px; } - -.checkbox-inline { - margin-left: 15px; } - -#refVis { - height: 180px; - position: relative; - width: 180px; } - -.chart div { - font: 13px sans-serif; - text-align: center; - padding: 3px 3px 3px 10px; - margin: 1px; - height: 20px; - border: 1px solid black; } - -.refVistooltip { - background: #eee; - box-shadow: 0 0 5px #999999; - color: #333; - display: none; - font-size: 12px; - left: 50px; - padding: 10px; - position: absolute; - text-align: center; - top: 60px; - width: 80px; - z-index: 999999; } - -/* -.main { - position: relative; - margin-right: 320px; - margin-left: 10px; - margin-top: 10px; - margin-bottom: 10px; - border-radius:5px; - padding: 20px; -}*/ -h2.panel { - text-align: center; - color: #180E3A; - font-weight: 500; - width: 0; - margin-left: -10px; - margin-right: 0px; - margin-top: 0px; - border-radius: 0px; - float: center; } - -.panel { - background: #ccc; - margin-right: 320px; - margin-left: 10px; - margin-top: 10px; - margin-bottom: 10px; - border-radius: 5px; - padding: 20px; - width: 260px; - float: left; - z-index: 99999; } - -#refArea { - border-radius: 5px; - border: 1.5px solid #8AC007; } - -#varArea { - border-radius: 5px; - border: 1.5px solid #FF9933; } - -#featArea { - border-radius: 5px; - border: 1.5px solid #FF99CC; } - -#readsArea { - padding: 0px; } - -#pileup { - position: absolute; - left: 10px; - right: 10px; - top: 100px; - bottom: 10px; } - -.graphArea { - padding: 3px; } - -.sampleSummary { - height: 100px; } - -.summary-svg { - overflow: visible !important; } - -.samples { - border-radius: 5px; - border: 1.5px solid #996633; - padding: 3px; } - .samples .col-md-10 { - overflow-x: hidden; } - -.sampleCoverage { - border-bottom: 1px solid #e7e7e7; - margin-bottom: 5px; - padding-bottom: 5px; } - -.ui-resizable-handle { - position: absolute; - font-size: 0.1px; - z-index: 99999; - display: block; } - -.ui-resizable { - margin-bottom: 30px; } - -.ui-resizable-s { - background: linear-gradient(#8A8A8A, #2F2F2F); - border-top: 1px solid #111; - border-bottom: 1px solid #111; - box-shadow: inset 0 1px 0 #6e6e6e, 0 2px 2px rgba(0, 0, 0, 0.4); - height: 10px; - bottom: 0px; } - -.ui-resizable-s:after { - content: "-"; - display: block; - position: absolute; - top: 0; - left: calc(50% - 12px); - height: 2px; - width: 25px; - background-color: rgba(0, 0, 0, 0.4); - margin: 3px; } - -.col-md-10 { - border-left: 1px solid #e7e7e7; } - -#searchfield { - display: block; - width: 100%; - text-align: center; - margin-bottom: 35px; } - -#searchfield form { - display: inline-block; - background: #eeefed; - padding: 0; - margin: 0; - /*padding: 5px;*/ - /*border-radius: 3px;*/ - margin: 5px 0 0 0; } - -#searchfield form .biginput { - height: 22px; - padding: 4px 12px; } - -#searchfield form .biginput:focus { - color: #858585; } - -.flatbtn { - -webkit-box-sizing: border-box; - -moz-box-sizing: border-box; - box-sizing: border-box; - display: inline-block; - outline: 0; - border: 0; - color: #f3faef; - text-decoration: none; - background-color: #6bb642; - border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25); - font-size: 1.2em; - font-weight: bold; - padding: 12px 22px 12px 22px; - line-height: normal; - text-align: center; - vertical-align: middle; - cursor: pointer; - text-transform: uppercase; - text-shadow: 0 1px 0 rgba(0, 0, 0, 0.3); - -webkit-border-radius: 3px; - -moz-border-radius: 3px; - border-radius: 3px; - -webkit-box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); - -moz-box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); - box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); } - -.flatbtn:hover { - color: #fff; - background-color: #73c437; } - -.flatbtn:active { - -webkit-box-shadow: inset 0 1px 5px rgba(0, 0, 0, 0.1); - -moz-box-shadow: inset 0 1px 5px rgba(0, 0, 0, 0.1); - box-shadow: inset 0 1px 5px rgba(0, 0, 0, 0.1); } - -.autocomplete-suggestions { - border: 1px solid #999; - background: #fff; - cursor: default; - overflow: auto; } - -.autocomplete-suggestion { - padding: 10px 5px; - font-size: 1.2em; - white-space: nowrap; - overflow: hidden; } - -.autocomplete-selected { - background: #f0f0f0; } - -.autocomplete-suggestions strong { - font-weight: normal; - color: #3399ff; } - -/*# sourceMappingURL=main.css.map */ diff --git a/mango-cli/src/main/webapp/stylesheets/main.scss b/mango-cli/src/main/webapp/stylesheets/main.scss deleted file mode 100644 index ac48ba1d5..000000000 --- a/mango-cli/src/main/webapp/stylesheets/main.scss +++ /dev/null @@ -1,319 +0,0 @@ -@import 'modules/_colors'; -@import 'partials/_menu'; - -.chrChart { - width: 100%; - margin-left: auto; - margin-right: auto; - - div { - background-color: steelblue; - text-align: left; - padding: 1px 1px 1px 10px; - margin: 1px; - color: white; - } -} - -.absolute { - position: absolute; -} - -.alignmentData { - margin-top:3px; - padding-top: 3px; - border-top: 1px solid black; -} - -.mergedReads { - border-bottom: 1px solid $covBorder; - margin-bottom: 5px; -} - -body { - font-family: 'Open Sans', sans-serif; - font-weight:300; -} - -h1 { - text-align: center; - margin: 10px 0 0 0; - color: $dark-grey; - font-weight:400; - text-transform: uppercase; -} - -h2 { - text-align: center; - margin: 0 0 10px 0; - color: $med-grey; - font-weight:400; -} - -.highlight { - font-weight:700; -} - -ul { - list-style-type: none; - margin: 0; - padding: 0 5px 0 5px; - float: left; -} -li { - display: inline; - float: left; - padding: 0 0 0 5px; -} -.axis { - path, line { - fill: none; - stroke: black; - shape-rendering: crispEdges; - } - - text { - font-family: sans-serif; - font-size: 11px; - } -} - -div.tooltip { - position: absolute; - text-align: center; - padding: 2px; - background: $med-grey; - border: 0px; - pointer-events: none; - color: white; - margin-top:70px; - p { - margin: 0px; - } -} - -input[type="text"] { - height: 22px; - padding: 4px 12px; -} - -.checkbox-inline { - margin-left:15px; -} - -#refVis { - height: 180px; - position: relative; - width: 180px; -} - -.chart div { - font: 13px sans-serif; - text-align: center; - padding: 3px 3px 3px 10px; - margin: 1px; - height: 20px; - border: 1px solid black; -} - -.refVistooltip { - background: #eee; - box-shadow: 0 0 5px #999999; - color: #333; - display: none; - font-size: 12px; - left: 50px; - padding: 10px; - position: absolute; - text-align: center; - top: 60px; - width: 80px; - z-index: 999999; -} - -/* -.main { - position: relative; - margin-right: 320px; - margin-left: 10px; - margin-top: 10px; - margin-bottom: 10px; - border-radius:5px; - padding: 20px; -}*/ - -h2.panel { - text-align: center; - color: #180E3A; - font-weight: 500; - width: 0; - margin-left: -10px; - margin-right: 0px; - margin-top: 0px; - border-radius: 0px; - float: center; } -.panel { - background: #ccc; - margin-right: 320px; - margin-left: 10px; - margin-top: 10px; - margin-bottom: 10px; - border-radius: 5px; - padding: 20px; - width: 260px; - float:left; - z-index: 99999;} - - -#refArea { - border-radius: 5px; - border: 1.5px solid $refBorder; -} - -#varArea { - border-radius: 5px; - border: 1.5px solid $varBorder; -} - -#featArea { - border-radius: 5px; - border: 1.5px solid $featBorder; -} - -#readsArea { - padding: 0px; -} - -#pileup { - position: absolute; - left: 10px; - right: 10px; - top: 100px; - bottom: 10px; -} - -.graphArea { - padding: 3px; -} - -.sampleSummary { - height: 100px; -} - -.summary-svg { - overflow: visible !important; -} - -.samples { - border-radius: 5px; - border: 1.5px solid $readBorder; - padding: 3px; - - .col-md-10 { - overflow-x: hidden; - } -} - -.sampleCoverage { - border-bottom: 1px solid $covBorder; - margin-bottom: 5px; - padding-bottom: 5px; -} - -.ui-resizable-handle { - position: absolute; - font-size: 0.1px; - z-index: 99999; - display: block; -} - -.ui-resizable { - margin-bottom: 30px -} - -.ui-resizable-s { - background: linear-gradient(#8A8A8A, #2F2F2F); - border-top: 1px solid #111; - border-bottom: 1px solid #111; - box-shadow: inset 0 1px 0 #6e6e6e,0 2px 2px rgba(0,0,0,0.4); - height:10px; - bottom: 0px; -} - -.ui-resizable-s:after { - content: "-"; - display: block; - position: absolute; - top: 0; - left: calc(50% - 12px); - height: 2px; - width: 25px; - background-color:rgba(0,0,0,0.4); - margin:3px; -} - -.col-md-10 { - border-left: 1px solid $covBorder; -} - -#searchfield { display: block; width: 100%; text-align: center; margin-bottom: 35px; } - -#searchfield form { - display: inline-block; - background: #eeefed; - padding: 0; - margin: 0; - /*padding: 5px;*/ - /*border-radius: 3px;*/ - margin: 5px 0 0 0; -} -#searchfield form .biginput { - height: 22px; - padding: 4px 12px; -} -#searchfield form .biginput:focus { - color: #858585; -} - -.flatbtn { - -webkit-box-sizing: border-box; - -moz-box-sizing: border-box; - box-sizing: border-box; - display: inline-block; - outline: 0; - border: 0; - color: #f3faef; - text-decoration: none; - background-color: #6bb642; - border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25); - font-size: 1.2em; - font-weight: bold; - padding: 12px 22px 12px 22px; - line-height: normal; - text-align: center; - vertical-align: middle; - cursor: pointer; - text-transform: uppercase; - text-shadow: 0 1px 0 rgba(0,0,0,0.3); - -webkit-border-radius: 3px; - -moz-border-radius: 3px; - border-radius: 3px; - -webkit-box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); - -moz-box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); - box-shadow: 0 1px 0 rgba(15, 15, 15, 0.3); -} -.flatbtn:hover { - color: #fff; - background-color: #73c437; -} -.flatbtn:active { - -webkit-box-shadow: inset 0 1px 5px rgba(0, 0, 0, 0.1); - -moz-box-shadow:inset 0 1px 5px rgba(0, 0, 0, 0.1); - box-shadow:inset 0 1px 5px rgba(0, 0, 0, 0.1); -} - -.autocomplete-suggestions { border: 1px solid #999; background: #fff; cursor: default; overflow: auto; } -.autocomplete-suggestion { padding: 10px 5px; font-size: 1.2em; white-space: nowrap; overflow: hidden; } -.autocomplete-selected { background: #f0f0f0; } -.autocomplete-suggestions strong { font-weight: normal; color: #3399ff; } - diff --git a/mango-cli/src/main/webapp/stylesheets/modules/_colors.scss b/mango-cli/src/main/webapp/stylesheets/modules/_colors.scss deleted file mode 100644 index 6b0966ee9..000000000 --- a/mango-cli/src/main/webapp/stylesheets/modules/_colors.scss +++ /dev/null @@ -1,12 +0,0 @@ -$viewSelected: #5BC0DE; -$liHover: #4682B4; - -// Section Borders -$refBorder: #8AC007; -$varBorder: #FF9933; -$featBorder: #FF99CC; -$readBorder: #996633; -$covBorder: #e7e7e7; - -$dark-grey: #333333; -$med-grey: #999999; diff --git a/mango-cli/src/main/webapp/stylesheets/partials/_menu.scss b/mango-cli/src/main/webapp/stylesheets/partials/_menu.scss deleted file mode 100644 index d9c71ff82..000000000 --- a/mango-cli/src/main/webapp/stylesheets/partials/_menu.scss +++ /dev/null @@ -1,37 +0,0 @@ -#navbar { - .nav-tabs .dropdown-menu, .nav-pills .dropdown-menu, .navbar .dropdown-menu { - margin-top: 0; - } - - .dropdown-menu { - li { - width: 100%; - } - - li:hover { - color: white; - background-color: $liHover !important; - } - } - - .dropdown { - &:hover .dropdown-menu { - display: block; - } - } - - li { - label { - width: 100%; - font-weight: 300; - } - } - - input[type=checkbox] { - display: none; - } - - input[type=checkbox]:checked + li { - background: $viewSelected; - } -} diff --git a/mango-cli/src/test/scala/org/bdgenomics/mango/cli/VizReadsSuite.scala b/mango-cli/src/test/scala/org/bdgenomics/mango/cli/VizReadsSuite.scala deleted file mode 100644 index 70d83c3e0..000000000 --- a/mango-cli/src/test/scala/org/bdgenomics/mango/cli/VizReadsSuite.scala +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.mango.cli - -import net.liftweb.json._ -import org.bdgenomics.mango.layout._ -import org.bdgenomics.mango.models.LazyMaterialization -import org.bdgenomics.mango.util.MangoFunSuite -import org.scalatra.{ RequestEntityTooLarge, Ok } -import org.scalatra.test.scalatest.ScalatraSuite - -class VizReadsSuite extends MangoFunSuite with ScalatraSuite { - - implicit val formats = DefaultFormats - addServlet(classOf[VizServlet], "/*") - - val bamFile = ClassLoader.getSystemClassLoader.getResource("mouse_chrM.bam").getFile - val referenceFile = ClassLoader.getSystemClassLoader.getResource("mm10_chrM.fa").getFile - val vcfFile = ClassLoader.getSystemClassLoader.getResource("truetest.genotypes.vcf").getFile - val featureFile = ClassLoader.getSystemClassLoader.getResource("smalltest.bed").getFile - val coverageFile = ClassLoader.getSystemClassLoader.getResource("mouse_chrM.coverage.adam").getFile - - val bamKey = LazyMaterialization.filterKeyFromFile(bamFile) - val featureKey = LazyMaterialization.filterKeyFromFile(featureFile) - val vcfKey = LazyMaterialization.filterKeyFromFile(vcfFile) - val coverageKey = LazyMaterialization.filterKeyFromFile(coverageFile) - - val args = new VizReadsArgs() - args.readsPaths = bamFile - args.referencePath = referenceFile - args.variantsPaths = vcfFile - args.featurePaths = featureFile - args.testMode = true - - sparkTest("Should pass for discovery mode") { - val args = new VizReadsArgs() - args.discoveryMode = true - args.referencePath = referenceFile - args.featurePaths = featureFile - args.variantsPaths = vcfFile - args.testMode = true - - implicit val vizReads = runVizReads(args) - get(s"/features/${featureKey}/chrM?start=0&end=2000") { - assert(status == Ok("").status.code) - } - } - - sparkTest("/reference/:ref") { - implicit val VizReads = runVizReads(args) - // should return data - get("/reference/chrM?start=1&end=100") { - assert(status == Ok("").status.code) - val ref = parse(response.getContent()).extract[String] - assert(ref.length == 99) - } - } - - sparkTest("/reads/:key/:ref") { - implicit val VizReads = runVizReads(args) - get(s"/reads/${bamKey}/chrM?start=0&end=100") { - assert(status == Ok("").status.code) - } - } - - sparkTest("/reads/coverage/:key/:ref") { - implicit val VizReads = runVizReads(args) - get(s"/reads/coverage/${bamKey}/chrM?start=1&end=100") { - assert(status == Ok("").status.code) - val json = parse(response.getContent()).extract[Array[PositionCount]] - assert(json.length == 99) - } - } - - sparkTest("/variants/:key/:ref") { - val args = new VizReadsArgs() - args.referencePath = referenceFile - args.variantsPaths = vcfFile - args.testMode = true - args.showGenotypes = true - - implicit val VizReads = runVizReads(args) - get(s"/variants/${vcfKey}/chrM?start=0&end=100") { - assert(status == Ok("").status.code) - val json = parse(response.getContent()).extract[Array[String]].map(r => GenotypeJson(r)) - .sortBy(_.variant.getStart) - assert(json.length == 3) - assert(json.head.variant.getStart == 19) - assert(json.head.sampleIds.length == 2) - } - } - - sparkTest("does not return genotypes when binned") { - implicit val VizReads = runVizReads(args) - get(s"/variants/${vcfKey}/chrM?start=0&end=100&binning=100") { - assert(status == Ok("").status.code) - val json = parse(response.getContent()).extract[Array[String]].map(r => GenotypeJson(r)) - .sortBy(_.variant.getStart) - assert(json.length == 1) - assert(json.head.sampleIds.length == 0) - } - } - - sparkTest("/features/:key/:ref") { - implicit val vizReads = runVizReads(args) - get(s"/features/${featureKey}/chrM?start=0&end=1200") { - assert(status == Ok("").status.code) - val json = parse(response.getContent()).extract[Array[BedRowJson]] - assert(json.length == 2) - } - } - - sparkTest("/coverage/:key/:ref") { - val args = new VizReadsArgs() - args.referencePath = referenceFile - args.coveragePaths = coverageFile - args.testMode = true - - implicit val vizReads = runVizReads(args) - get(s"/coverage/${coverageKey}/chrM?start=0&end=1200") { - assert(status == Ok("").status.code) - val json = parse(response.getContent()).extract[Array[PositionCount]] - assert(json.map(_.start).distinct.length == 1200) - } - } - -} diff --git a/mango-core/pom.xml b/mango-core/pom.xml index 134a712a7..849af14f3 100644 --- a/mango-core/pom.xml +++ b/mango-core/pom.xml @@ -162,15 +162,6 @@ scalatest_${scala.version.prefix} test - - org.scalatra - scalatra_${scala.version.prefix} - compile - - - org.scalatra.scalate - scalate-core_${scala.version.prefix} - org.seqdoop hadoop-bam diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/layout/Layout.scala b/mango-core/src/main/scala/org/bdgenomics/mango/layout/Layout.scala index f555bca2b..ac26f07fb 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/layout/Layout.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/layout/Layout.scala @@ -1,4 +1,3 @@ - /** * Licensed to Big Data Genomics (BDG) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +19,7 @@ package org.bdgenomics.mango.layout import net.liftweb.json.Serialization.write import net.liftweb.json._ +import org.bdgenomics.adam.models.ReferenceRegion import org.bdgenomics.formats.avro.Variant /** @@ -66,6 +66,16 @@ case class GenotypeJson(variant: Variant, sampleIds: Array[String]) { write(GenotypeString(VariantJson(variant), sampleIds))(formats) } + /** + * Checks whether this overlaps ReferenceRegion. + * + * @param region ReferenceRegion to query for overlap + * @return Boolean whther GenotypeJson overlaps region + */ + def overlaps(region: ReferenceRegion): Boolean = { + val vRegion = ReferenceRegion(variant.getContigName, variant.getStart, variant.getEnd) + region.overlaps(vRegion) + } } /** @@ -96,7 +106,8 @@ object GenotypeJson { } /** - * Makes genotype json without genotype sample names + * Makes genotype json without genotype sample names. + * * @param variant Variant * @return GenotypeJson */ @@ -112,7 +123,29 @@ object GenotypeJson { * @param start start of feature region * @param stop end of feature region */ -case class BedRowJson(id: String, featureType: String, contig: String, start: Long, stop: Long, score: Int) +case class BedRowJson(id: String, featureType: String, contig: String, start: Long, stop: Long, score: Int) { + + /** + * Checks whether this overlaps ReferenceRegion. + * + * @param region ReferenceRegion to query for overlap + * @return Boolean whther BedRowJson overlaps region + */ + def overlaps(region: ReferenceRegion): Boolean = { + overlaps(this, region) + } + + /** + * Checks whether this overlaps ReferenceRegion. + * + * @param t BedRowJson to query overlap for + * @param region ReferenceRegion to query for overlap + * @return Boolean whther BedRowJson overlaps region + */ + def overlaps(t: BedRowJson, region: ReferenceRegion): Boolean = { + (t.contig == region.referenceName) && (t.start < region.end) && (t.stop > region.start) + } +} /** * Class for covertering adam coverage to coverage format readable by pileup.js @@ -120,5 +153,17 @@ case class BedRowJson(id: String, featureType: String, contig: String, start: Lo * @param end Base pair end chromosome * @param count Coverage at the specified base pair */ -case class PositionCount(start: Long, end: Long, count: Int) +case class PositionCount(contig: String, start: Long, end: Long, count: Int) { + + /** + * Checks whether this overlaps ReferenceRegion. + * + * @param region ReferenceRegion to query for overlap + * @return Boolean whther PositionCount overlaps region + */ + def overlaps(region: ReferenceRegion): Boolean = { + val pRegion = ReferenceRegion(contig, start, end) + region.overlaps(pRegion) + } +} diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/AlignmentRecordMaterialization.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/AlignmentRecordMaterialization.scala index bf9b7ce40..e8b4f5139 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/models/AlignmentRecordMaterialization.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/AlignmentRecordMaterialization.scala @@ -21,8 +21,6 @@ import java.io.{ PrintWriter, StringWriter } import org.apache.hadoop.fs.Path import org.apache.parquet.filter2.dsl.Dsl._ -import org.apache.parquet.filter2.predicate.FilterPredicate -import org.apache.parquet.io.api.Binary import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.{ SequenceDictionary, ReferenceRegion } @@ -32,20 +30,23 @@ import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.mango.converters.GA4GHConverter import org.bdgenomics.mango.layout.PositionCount +import org.bdgenomics.mango.core.util.ResourceUtils import org.bdgenomics.utils.misc.Logging +import org.bdgenomics.utils.instrumentation.Metrics import org.ga4gh.{ GAReadAlignment, GASearchReadsResponse } import net.liftweb.json.Serialization._ import org.seqdoop.hadoop_bam.util.SAMHeaderReader import scala.collection.JavaConversions._ -import org.bdgenomics.utils.instrumentation.Metrics // metric variables object AlignmentTimers extends Metrics { - val loadADAMData = timer("LOAD alignments from parquet") - val loadBAMData = timer("LOAD alignments from BAM files") + val loadADAMData = timer("load alignments from parquet") + val loadBAMData = timer("load alignments from BAM files") val getCoverageData = timer("get coverage data from IntervalRDD") val getAlignmentData = timer("get alignment data from IntervalRDD") val convertToGaReads = timer("convert parquet alignments to GA4GH Reads") + val collect = timer("collect alignments") + val toJson = timer("convert alignments to json") } /** @@ -59,13 +60,11 @@ object AlignmentTimers extends Metrics { class AlignmentRecordMaterialization(@transient sc: SparkContext, files: List[String], sd: SequenceDictionary, - prefetchSize: Option[Int] = None) - extends LazyMaterialization[AlignmentRecord]("AlignmentRecordRDD", sc, files, sd, prefetchSize) + prefetchSize: Option[Long] = None) + extends LazyMaterialization[AlignmentRecord, GAReadAlignment](AlignmentRecordMaterialization.name, sc, files, sd, prefetchSize) with Serializable with Logging { - @transient implicit val formats = net.liftweb.json.DefaultFormats - - def load = (file: String, region: Option[ReferenceRegion]) => AlignmentRecordMaterialization.load(sc, file, region).rdd + def load = (file: String, regions: Option[Iterable[ReferenceRegion]]) => AlignmentRecordMaterialization.load(sc, file, regions).rdd /** * Extracts ReferenceRegion from AlignmentRecord @@ -98,71 +97,76 @@ class AlignmentRecordMaterialization(@transient sc: SparkContext, AlignmentTimers.getCoverageData.time { val covCounts: RDD[(String, PositionCount)] = - get(region) + get(Some(region)) .flatMap(r => { val t: List[Long] = List.range(r._2.getStart, r._2.getEnd) t.map(n => ((ReferenceRegion(r._2.getContigName, n, n + 1), r._1), 1)) .filter(_._1._1.overlaps(region)) // filter out read fragments not overlapping region }).reduceByKey(_ + _) // reduce coverage by combining adjacent frequenct - .map(r => (r._1._2, PositionCount(r._1._1.start, r._1._1.start + 1, r._2))) + .map(r => (r._1._2, PositionCount(r._1._1.referenceName, r._1._1.start, r._1._1.start + 1, r._2))) covCounts.collect.groupBy(_._1) // group by sample Id .map(r => (r._1, write(r._2.map(_._2)))) } } + def toCoverage(arr: Array[GAReadAlignment], region: ReferenceRegion): Array[PositionCount] = { + arr.flatMap(r => { + val t: List[Long] = List.range(r.getAlignment.getPosition.getPosition, r.getAlignment.getPosition.getPosition + r.getAlignedSequence.length) + t.map(n => (ReferenceRegion(r.getAlignment.getPosition.getReferenceName, n, n + 1), 1)) + .filter(_._1.overlaps(region)) // filter out read fragments not overlapping region + }).groupBy(_._1).map(r => (r._1, r._2.map(_._2).sum)) // reduce coverage by combining adjacent frequenct + .map(r => PositionCount(r._1.referenceName, r._1.start, r._1.end, r._2)).toArray + } + /** * Formats raw data from KLayeredTile to JSON. This is requied by KTiles * @param data RDD of (id, AlignmentRecord) tuples * @return JSONified data */ - def stringify(data: RDD[(String, AlignmentRecord)]): Map[String, String] = { - val flattened: Map[String, Array[AlignmentRecord]] = + override def toJson(data: RDD[(String, AlignmentRecord)]): Map[String, Array[GAReadAlignment]] = { + AlignmentTimers.collect.time { AlignmentTimers.getAlignmentData.time { - data - .filter(r => r._2.getMapq > 0) - .collect - .groupBy(_._1) - .map(r => (r._1, r._2.map(_._2))) + data.mapValues(r => Array(GA4GHConverter.toGAReadAlignment(r))) + .reduceByKeyLocally(_ ++ _).toMap } - - AlignmentTimers.convertToGaReads.time { - - val gaReads: Map[String, List[GAReadAlignment]] = flattened.mapValues(l => l.map(r => GA4GHConverter.toGAReadAlignment(r)).toList) - - gaReads.mapValues(v => { - GASearchReadsResponse.newBuilder() - .setAlignments(v) - .build().toString - }) } } + + /** + * Formats raw data from KLayeredTile to JSON. This is requied by KTiles + * @param data RDD of (id, AlignmentRecord) tuples + * @return JSONified data + */ + override def stringify(data: Array[GAReadAlignment]): String = { + GASearchReadsResponse.newBuilder() + .setAlignments(data.toList) + .build().toString + } } object AlignmentRecordMaterialization extends Logging { - def apply(sc: SparkContext, files: List[String], sd: SequenceDictionary): AlignmentRecordMaterialization = { - new AlignmentRecordMaterialization(sc, files, sd) - } + val name = "AlignmentRecord" /** * Loads alignment data from bam, sam and ADAM file formats * @param sc SparkContext - * @param region Region to load + * @param regions Iterable of ReferenceRegions to load * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def load(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): AlignmentRecordRDD = { - if (fp.endsWith(".adam")) loadAdam(sc, fp, region) + def load(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): AlignmentRecordRDD = { + if (fp.endsWith(".adam")) loadAdam(sc, fp, regions) else { try { - AlignmentRecordMaterialization.loadFromBam(sc, fp, region) + AlignmentRecordMaterialization.loadFromBam(sc, fp, regions) .transform(rdd => rdd.filter(_.getReadMapped)) } catch { case e: Exception => { val sw = new StringWriter e.printStackTrace(new PrintWriter(sw)) - throw UnsupportedFileException("bam index not provided. Stack trace: " + sw.toString) + throw UnsupportedFileException(s"bam index not provided for file ${fp}. Stack trace: " + sw.toString) } } } @@ -171,28 +175,23 @@ object AlignmentRecordMaterialization extends Logging { /** * Loads data from bam files (indexed or unindexed) from persistent storage * @param sc SparkContext - * @param region Region to load + * @param regions Iterable of ReferenceRegions to load * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def loadFromBam(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): AlignmentRecordRDD = { + def loadFromBam(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): AlignmentRecordRDD = { AlignmentTimers.loadBAMData.time { - region match { - case Some(_) => - val regions = LazyMaterialization.getContigPredicate(region.get) - var alignments: AlignmentRecordRDD = null - // hack to get around issue in hadoop_bam, which throws error if contigName is not found in bam file - val path = new Path(fp) - val fileSd = SequenceDictionary(SAMHeaderReader.readSAMHeaderFrom(path, sc.hadoopConfiguration)) - for (r <- List(regions._1, regions._2)) { - if (fileSd.containsRefName(r.referenceName)) { - val x = sc.loadIndexedBam(fp, r) - if (alignments == null) alignments = x - else alignments = alignments.transform(rdd => rdd.union(x.rdd)) - } - } - alignments - case _ => sc.loadBam(fp) + if (regions.isDefined) { + // hack to get around issue in hadoop_bam, which throws error if contigName is not found in bam file + val path = new Path(fp) + val fileSd = SequenceDictionary(SAMHeaderReader.readSAMHeaderFrom(path, sc.hadoopConfiguration)) + val predicateRegions: Iterable[ReferenceRegion] = regions.get + .flatMap(r => { + LazyMaterialization.getContigPredicate(r) + }).filter(r => fileSd.containsRefName(r.referenceName)) + sc.loadIndexedBam(fp, predicateRegions) + } else { + sc.loadBam(fp) } } } @@ -200,27 +199,23 @@ object AlignmentRecordMaterialization extends Logging { /** * Loads ADAM data using predicate pushdowns * @param sc SparkContext - * @param region Region to load + * @param regions Iterable of ReferenceRegions to load * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def loadAdam(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): AlignmentRecordRDD = { - AlignmentTimers.loadADAMData.time { - val pred: Option[FilterPredicate] = - region match { - case Some(_) => { - val contigs = LazyMaterialization.getContigPredicate(region.get) - Some((LongColumn("end") >= region.get.start) && (LongColumn("start") <= region.get.end) && - (BinaryColumn("contigName") === Binary.fromString(contigs._1.referenceName) || - BinaryColumn("contigName") === Binary.fromString(contigs._2.referenceName)) && - (BooleanColumn("readMapped") === true)) - } case None => None - } - val proj = Projection(AlignmentRecordField.contigName, AlignmentRecordField.mapq, AlignmentRecordField.readName, - AlignmentRecordField.start, AlignmentRecordField.readMapped, AlignmentRecordField.recordGroupName, - AlignmentRecordField.end, AlignmentRecordField.sequence, AlignmentRecordField.cigar, AlignmentRecordField.readNegativeStrand, - AlignmentRecordField.readPaired, AlignmentRecordField.recordGroupSample) - sc.loadParquetAlignments(fp, predicate = pred, projection = Some(proj)) - } + def loadAdam(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): AlignmentRecordRDD = { + val pred = + if (regions.isDefined) { + val prefixRegions: Iterable[ReferenceRegion] = regions.get.map(r => LazyMaterialization.getContigPredicate(r)).flatten + Some(ResourceUtils.formReferenceRegionPredicate(prefixRegions) && (BooleanColumn("readMapped") === true) && (IntColumn("mapq") > 0)) + } else { + Some((BooleanColumn("readMapped") === true) && (IntColumn("mapq") > 0)) + } + + val proj = Projection(AlignmentRecordField.contigName, AlignmentRecordField.mapq, AlignmentRecordField.readName, + AlignmentRecordField.start, AlignmentRecordField.readMapped, AlignmentRecordField.recordGroupName, + AlignmentRecordField.end, AlignmentRecordField.sequence, AlignmentRecordField.cigar, AlignmentRecordField.readNegativeStrand, + AlignmentRecordField.readPaired, AlignmentRecordField.recordGroupSample) + sc.loadParquetAlignments(fp, predicate = pred, projection = Some(proj)) } } diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/CoverageMaterialization.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/CoverageMaterialization.scala index 1b56d89a5..01a1774f8 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/models/CoverageMaterialization.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/CoverageMaterialization.scala @@ -18,22 +18,20 @@ package org.bdgenomics.mango.models import java.io.{ PrintWriter, StringWriter } -import net.liftweb.json.Serialization.write -import org.apache.parquet.filter2.predicate.FilterPredicate import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.apache.parquet.filter2.dsl.Dsl._ import org.bdgenomics.adam.models.{ Coverage, ReferenceRegion, SequenceDictionary } import org.bdgenomics.adam.projections.{ Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.feature.CoverageRDD +import org.bdgenomics.mango.core.util.ResourceUtils import org.bdgenomics.mango.layout.PositionCount import org.bdgenomics.utils.misc.Logging /** * - * @param s SparkContext - * @param dict Sequence Dictionay calculated from reference + * @param sc SparkContext + * @param sd Sequence Dictionay calculated from reference * extends LazyMaterialization and KTiles * @see LazyMaterialization * @see KTiles @@ -41,13 +39,11 @@ import org.bdgenomics.utils.misc.Logging class CoverageMaterialization(@transient sc: SparkContext, files: List[String], sd: SequenceDictionary, - prefetchSize: Option[Int] = None) - extends LazyMaterialization[Coverage]("CoverageRDD", sc, files, sd, prefetchSize) + prefetchSize: Option[Long] = None) + extends LazyMaterialization[Coverage, PositionCount](CoverageMaterialization.name, sc, files, sd, prefetchSize) with Serializable with Logging { - @transient implicit val formats = net.liftweb.json.DefaultFormats - - def load = (file: String, region: Option[ReferenceRegion]) => CoverageMaterialization.load(sc, file, region).rdd + def load = (file: String, regions: Option[Iterable[ReferenceRegion]]) => CoverageMaterialization.load(sc, file, regions).rdd /** * Extracts ReferenceRegion from CoverageRecord @@ -75,8 +71,8 @@ class CoverageMaterialization(@transient sc: SparkContext, * @param binning Tells what granularity of coverage to return. Used for large regions * @return JSONified data map */ - def getCoverage(region: ReferenceRegion, binning: Int = 1): Map[String, String] = { - val data: RDD[(String, Coverage)] = get(region) + override def getJson(region: ReferenceRegion, verbose: Boolean = false, binning: Int = 1): Map[String, Array[PositionCount]] = { + val data: RDD[(String, Coverage)] = get(Some(region)) val covCounts: RDD[(String, PositionCount)] = if (binning > 1) { @@ -85,37 +81,34 @@ class CoverageMaterialization(@transient sc: SparkContext, // map to bin start, bin end val start = r._1._2.start val end = Math.max(r._2.end, start + binning) - (r._1._1, PositionCount(start, end, r._2.count.toInt)) + (r._1._1, PositionCount(region.referenceName, start, end, r._2.count.toInt)) }) } else { - data.mapValues(r => PositionCount(r.start, r.end, r.count.toInt)) + data.mapValues(r => PositionCount(region.referenceName, r.start, r.end, r.count.toInt)) } covCounts.collect.groupBy(_._1) // group by sample Id - .mapValues(r => r.sortBy(_._2.start)) // sort coverage - .map(r => (r._1, write(r._2.map(_._2)))) + .mapValues(r => r.map(_._2).sortBy(_.start)) // sort coverage } + /** * Formats raw data from KLayeredTile to JSON. This is required by KTiles * * @param data RDD of (id, AlignmentRecord) tuples * @return JSONified data */ - def stringify(data: RDD[(String, Coverage)]): Map[String, String] = { - val flattened: Map[String, Array[PositionCount]] = data - .collect + def toJson(data: RDD[(String, Coverage)]): Map[String, Array[PositionCount]] = { + data.collect .groupBy(_._1) .map(r => (r._1, r._2.map(_._2))) - .mapValues(r => r.map(f => PositionCount(f.start, f.end, f.count.toInt))) - flattened.mapValues(r => write(r)) + .mapValues(r => r.map(f => PositionCount(f.contigName, f.start, f.end, f.count.toInt))) } + } object CoverageMaterialization { - def apply(sc: SparkContext, files: List[String], sd: SequenceDictionary): CoverageMaterialization = { - new CoverageMaterialization(sc, files, sd) - } + val name = "Coverage" /** * Loads alignment data from ADAM file formats @@ -124,11 +117,11 @@ object CoverageMaterialization { * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def load(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): CoverageRDD = { - if (fp.endsWith(".adam")) loadAdam(sc, fp, region) + def load(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): CoverageRDD = { + if (fp.endsWith(".adam")) loadAdam(sc, fp, regions) else { try { - FeatureMaterialization.loadData(sc, fp, region).toCoverage + FeatureMaterialization.loadData(sc, fp, regions).toCoverage } catch { case e: Exception => { val sw = new StringWriter @@ -143,18 +136,18 @@ object CoverageMaterialization { * * @param sc SparkContext * @param fp filepath to load from - * @param region Region to load + * @param regions Iterable of ReferenceRegions to load * @return CoverageRDD of data from the file over specified ReferenceRegion */ - def loadAdam(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): CoverageRDD = { - val pred: Option[FilterPredicate] = - region match { - case Some(_) => - val contigs = LazyMaterialization.getContigPredicate(region.get) - Some((LongColumn("end") <= region.get.end) && (LongColumn("start") >= region.get.start) && - (BinaryColumn("contigName") === contigs._1.referenceName || BinaryColumn("contigName") === contigs._2.referenceName)) - case None => None + def loadAdam(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): CoverageRDD = { + val pred = + if (regions.isDefined) { + val prefixRegions: Iterable[ReferenceRegion] = regions.get.map(r => LazyMaterialization.getContigPredicate(r)).flatten + Some(ResourceUtils.formReferenceRegionPredicate(prefixRegions)) + } else { + None } sc.loadParquetCoverage(fp, predicate = pred).flatten() } -} + +} \ No newline at end of file diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/FeatureMaterialization.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/FeatureMaterialization.scala index 5311790af..a5b27f995 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/models/FeatureMaterialization.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/FeatureMaterialization.scala @@ -17,9 +17,6 @@ */ package org.bdgenomics.mango.models -import net.liftweb.json.Serialization.write -import org.apache.parquet.filter2.dsl.Dsl._ -import org.apache.parquet.filter2.predicate.FilterPredicate import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary } @@ -27,7 +24,7 @@ import org.bdgenomics.adam.projections.{ FeatureField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.feature.FeatureRDD import org.bdgenomics.formats.avro.Feature -import org.bdgenomics.mango.core.util.VizUtils +import org.bdgenomics.mango.core.util.{ ResourceUtils, VizUtils } import org.bdgenomics.mango.layout.BedRowJson import org.bdgenomics.utils.misc.Logging import java.io.{ StringWriter, PrintWriter } @@ -35,12 +32,10 @@ import java.io.{ StringWriter, PrintWriter } class FeatureMaterialization(@transient sc: SparkContext, files: List[String], sd: SequenceDictionary, - prefetchSize: Option[Int] = None) - extends LazyMaterialization[Feature]("FeatureRDD", sc, files, sd, prefetchSize) + prefetchSize: Option[Long] = None) + extends LazyMaterialization[Feature, BedRowJson](FeatureMaterialization.name, sc, files, sd, prefetchSize) with Serializable with Logging { - @transient implicit val formats = net.liftweb.json.DefaultFormats - /** * Extracts ReferenceRegion from Feature * @@ -49,7 +44,7 @@ class FeatureMaterialization(@transient sc: SparkContext, */ def getReferenceRegion = (f: Feature) => ReferenceRegion.unstranded(f) - def load = (file: String, region: Option[ReferenceRegion]) => FeatureMaterialization.load(sc, region, file).rdd + def load = (file: String, regions: Option[Iterable[ReferenceRegion]]) => FeatureMaterialization.load(sc, file, regions).rdd /** * Reset ReferenceName for Feature @@ -62,16 +57,15 @@ class FeatureMaterialization(@transient sc: SparkContext, f.setContigName(contig) f } - /** * Strinifies tuples of (sampleId, feature) to json * * @param data RDD (sampleId, Feature) * @return Map of (key, json) for the ReferenceRegion specified */ - def stringify(data: RDD[(String, Feature)]): Map[String, String] = { + def toJson(data: RDD[(String, Feature)]): Map[String, Array[BedRowJson]] = { - val flattened: Map[String, Array[BedRowJson]] = data + data .collect .groupBy(_._1) .map(r => (r._1, r._2.map(_._2))) @@ -85,8 +79,6 @@ class FeatureMaterialization(@transient sc: SparkContext, f.getContigName, f.getStart, f.getEnd, score) })) - - flattened.mapValues(r => write(r)) } /** @@ -96,8 +88,8 @@ class FeatureMaterialization(@transient sc: SparkContext, * @param binning Tells what granularity of coverage to return. Used for large regions * @return JSONified data map; */ - def getJson(region: ReferenceRegion, binning: Int = 1): Map[String, String] = { - val data = get(region) + override def getJson(region: ReferenceRegion, verbose: Boolean = false, binning: Int = 1): Map[String, Array[BedRowJson]] = { + val data = get(Some(region)) val binnedData = if (binning > 1) { @@ -114,25 +106,28 @@ class FeatureMaterialization(@transient sc: SparkContext, (r._1._1, binned) }) } else data - stringify(binnedData) + toJson(binnedData) } + } object FeatureMaterialization { + val name = "Feature" + /** * Loads feature data from bam, sam and ADAM file formats * * @param sc SparkContext - * @param region Region to load * @param fp filepath to load from + * @param regions Iterable of ReferenceRegion to load * @return RDD of data from the file over specified ReferenceRegion */ - def load(sc: SparkContext, region: Option[ReferenceRegion], fp: String): FeatureRDD = { - if (fp.endsWith(".adam")) FeatureMaterialization.loadAdam(sc, fp, region) + def load(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): FeatureRDD = { + if (fp.endsWith(".adam")) FeatureMaterialization.loadAdam(sc, fp, regions) else { try { - FeatureMaterialization.loadData(sc, fp, region) + FeatureMaterialization.loadData(sc, fp, regions) } catch { case e: Exception => { val sw = new StringWriter @@ -147,20 +142,23 @@ object FeatureMaterialization { * Loads data from bam files (indexed or unindexed) from persistent storage * * @param sc SparkContext - * @param region Region to load + * @param regions Iterable of ReferenceRegions to load * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def loadData(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): FeatureRDD = { - region match { - case Some(_) => - val contigs = LazyMaterialization.getContigPredicate(region.get) - val featureRdd = sc.loadFeatures(fp) - featureRdd.transform(rdd => rdd.rdd.filter(g => - (g.getContigName == contigs._1.referenceName || g.getContigName == contigs._2.referenceName) - && g.getStart < region.get.end - && g.getEnd > region.get.start)) - case None => sc.loadFeatures(fp) + def loadData(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): FeatureRDD = { + // if regions are specified, specifically load regions. Otherwise, load all data + if (regions.isDefined) { + val predicateRegions = regions.get + .flatMap(r => LazyMaterialization.getContigPredicate(r)) + .toArray + + sc.loadFeatures(fp) + .transform(rdd => rdd.rdd.filter(g => + !predicateRegions.filter(r => ReferenceRegion.unstranded(g).overlaps(r)).isEmpty)) + + } else { + sc.loadFeatures(fp) } } @@ -168,23 +166,22 @@ object FeatureMaterialization { * Loads ADAM data using predicate pushdowns * * @param sc SparkContext - * @param region Region to load + * @param regions Iterable of ReferenceRegion to load * @param fp filepath to load from * @return RDD of data from the file over specified ReferenceRegion */ - def loadAdam(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): FeatureRDD = { - val pred: Option[FilterPredicate] = - region match { - case Some(_) => - val contigs = LazyMaterialization.getContigPredicate(region.get) - Some((LongColumn("end") >= region.get.start) && (LongColumn("start") <= region.get.end) && - (BinaryColumn("contigName") === contigs._1.referenceName) || BinaryColumn("contigName") === contigs._2.referenceName) - case None => None + def loadAdam(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): FeatureRDD = { + val pred = + if (regions.isDefined) { + val predicateRegions: Iterable[ReferenceRegion] = regions.get + .flatMap(r => LazyMaterialization.getContigPredicate(r)) + Some(ResourceUtils.formReferenceRegionPredicate(predicateRegions)) + } else { + None } val proj = Projection(FeatureField.featureId, FeatureField.contigName, FeatureField.start, FeatureField.end, FeatureField.score, FeatureField.featureType) sc.loadParquetFeatures(fp, predicate = pred, projection = Some(proj)) } - -} \ No newline at end of file +} diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/LazyMaterialization.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/LazyMaterialization.scala index b180034e7..473fa9721 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/models/LazyMaterialization.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/LazyMaterialization.scala @@ -17,17 +17,26 @@ */ package org.bdgenomics.mango.models -import org.apache.spark.{ HashPartitioner, Partitioner, SparkContext } +import org.apache.spark.{ HashPartitioner, SparkContext } import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary } -import org.bdgenomics.adam.rdd.GenomicRegionPartitioner import org.bdgenomics.mango.util.Bookkeep import org.bdgenomics.utils.interval.rdd.IntervalRDD +import org.bdgenomics.utils.instrumentation.Metrics import org.bdgenomics.utils.misc.Logging - -import scala.collection.mutable.ListBuffer import scala.reflect.ClassTag +import net.liftweb.json.Serialization._ + +// metric variables +object LazyMaterializationTimers extends Metrics { + + def put = timer("put data in lazy materialization") + def get = timer("get data in lazy materialization") + def checkMemory = timer("check memory in lazy materialization") + def loadFiles = timer("load files in lazy materialization") + +} /** * Tracks regions of data already in memory and loads regions as needed. @@ -35,13 +44,14 @@ import scala.reflect.ClassTag * @param name Name of Materialization structure. Used for Spark UI. * @param prefetch prefetch size to lazily grab data. Defaults to 1000000 */ -abstract class LazyMaterialization[T: ClassTag](name: String, - @transient sc: SparkContext, - files: List[String], - sd: SequenceDictionary, - prefetch: Option[Int] = None) extends Serializable with Logging { +abstract class LazyMaterialization[T: ClassTag, S: ClassTag](name: String, + @transient sc: SparkContext, + files: List[String], + sd: SequenceDictionary, + prefetch: Option[Long] = None) extends Serializable with Logging { + @transient implicit val formats = net.liftweb.json.DefaultFormats - val prefetchSize = prefetch.getOrElse(10000) + val prefetchSize = prefetch.getOrElse(sd.records.map(_.length).max) val bookkeep = new Bookkeep(prefetchSize) var memoryFraction = 0.85 // default caching fraction @@ -50,13 +60,16 @@ abstract class LazyMaterialization[T: ClassTag](name: String, memoryFraction = fraction def getFiles: List[String] = files + + def getName: String = name + var intRDD: IntervalRDD[ReferenceRegion, (String, T)] = null /** * Used to generically load data from all file types * @return Generic RDD of data types from file */ - def load: (String, Option[ReferenceRegion]) => RDD[T] + def load: (String, Option[Iterable[ReferenceRegion]]) => RDD[T] /** * Extracts reference region from data type T @@ -70,20 +83,14 @@ abstract class LazyMaterialization[T: ClassTag](name: String, */ def setContigName: (T, String) => T + def stringify(data: Array[S]): String = write(data) + /** * Stringify T classtag to json * @param rdd RDD of elements keyed by String * @return Map of (key, json) for the ReferenceRegion specified */ - def stringify(rdd: RDD[(String, T)]): Map[String, String] - - /** - * Sets partitioner - * @return partitioner - */ - def setPartitioner: Partitioner = { - GenomicRegionPartitioner(sc.defaultParallelism, sd) - } + def toJson(rdd: RDD[(String, T)]): Map[String, Array[S]] /** * gets dictionary @@ -97,24 +104,11 @@ abstract class LazyMaterialization[T: ClassTag](name: String, * If data has yet been loaded, loads data within this region. * * @param region: ReferenceRegion to fetch + * @param verbose: Boolean for printing extra metrics in json. Ie, genotypes in VariantContext + * @param binning: Used for some Materialization structures, determines whether to bin data * @return Map of sampleIds and corresponding JSON */ - def getJson(region: ReferenceRegion): Map[String, String] = { - val seqRecord = sd(region.referenceName) - seqRecord match { - case Some(_) => { - val regionsOpt = bookkeep.getMissingRegions(region, files) - if (regionsOpt.isDefined) { - for (r <- regionsOpt.get) { - put(r) - } - } - stringify(intRDD.filterByInterval(region).toRDD.map(_._2)) - } case None => { - throw new Exception("Not found in dictionary") - } - } - } + def getJson(region: ReferenceRegion, verbose: Boolean = false, binning: Int = 1): Map[String, Array[S]] = toJson(get(Some(region))) /** * Bins region by binning size @@ -149,67 +143,69 @@ abstract class LazyMaterialization[T: ClassTag](name: String, * Filters all alignment data already loaded into the corresponding RDD that overlap a region. * If data has yet been loaded, loads data within this region. * - * @param region: ReferenceRegion to fetch + * @param regionOpt: ReferenceRegion to fetch * @return Map of sampleIds and corresponding JSON */ - def get(region: ReferenceRegion): RDD[(String, T)] = { - val seqRecord = sd(region.referenceName) - seqRecord match { - case Some(_) => { - val regionsOpt = bookkeep.getMissingRegions(region, files) - if (regionsOpt.isDefined) { - for (r <- regionsOpt.get) { - put(r) + def get(regionOpt: Option[ReferenceRegion] = None): RDD[(String, T)] = { + LazyMaterializationTimers.get.time { + regionOpt match { + case Some(_) => { + val region = regionOpt.get + val seqRecord = sd(region.referenceName) + seqRecord match { + case Some(_) => { + val missing = bookkeep.getMissingRegions(region, files).toIterable + if (!missing.isEmpty) { + put(missing) + } + intRDD.filterByInterval(region).toRDD.map(_._2) + } + case None => { + throw new Exception(s"${region} not found in dictionary") + } } } - intRDD.filterByInterval(region).toRDD.map(_._2) - } case None => { - throw new Exception(s"${region} not found in dictionary") + case None => { + val data = loadAllFiles(None) + + // tag entire sequence dictionary + bookkeep.rememberValues(sd, files) + + // we must repartition in case the data we are adding has no partitioner (i.e., empty RDD) + intRDD = partitionIntervalRDD(data) + intRDD.persist(StorageLevel.MEMORY_AND_DISK) + intRDD.setName(name) + intRDD.toRDD.map(_._2) + } } } } - def getAll(): RDD[T] = { - val hasChrPrefix = sd.records.head.name.startsWith("chr") - files.map(fp => load(fp, None)).reduce(_ union _) - .map(r => { - val region = LazyMaterialization.modifyChrPrefix(getReferenceRegion(r), hasChrPrefix) - setContigName(r, region.referenceName) - }) - } - /** * Transparent to the user, should only be called by get if IntervalRDD.get does not return data * Fetches the data from disk, using predicates and range filtering * Then puts fetched data in the IntervalRDD, and calls multiget again, now with the data existing * - * @param region ReferenceRegion in which data is retreived + * @param regions ReferenceRegion in which data is retreived */ - def put(region: ReferenceRegion) = { - checkMemory - val seqRecord = sd(region.referenceName) - if (seqRecord.isDefined) { - - // do we need to modify the chromosome prefix? - val hasChrPrefix = seqRecord.get.name.startsWith("chr") - - val data = - // get data for all samples - files.map(fp => { - val k = LazyMaterialization.filterKeyFromFile(fp) - load(fp, Some(region)).map(v => (k, v)) - }).reduce(_ union _).map(r => { - val region = LazyMaterialization.modifyChrPrefix(getReferenceRegion(r._2), hasChrPrefix) - (region, (r._1, setContigName(r._2, region.referenceName))) - }) + def put(regions: Iterable[ReferenceRegion]) = { + checkMemory() + + LazyMaterializationTimers.put.time { + + // filter out regions that are not found in the sequence dictionary + val filteredRegions = regions.filter(r => sd(r.referenceName).isDefined) + + val data = loadAllFiles(Some(regions)) // tag regions as found, even if there is no data - bookkeep.rememberValues(region, files) + filteredRegions.foreach(r => bookkeep.rememberValues(r, files)) // insert into IntervalRDD if there is data if (intRDD == null) { // we must repartition in case the data we are adding has no partitioner (i.e., empty RDD) - intRDD = IntervalRDD(data.partitionBy(new HashPartitioner(sc.defaultParallelism))) + intRDD = partitionIntervalRDD(data) + intRDD.persist(StorageLevel.MEMORY_AND_DISK) } else { val t = intRDD @@ -221,21 +217,66 @@ abstract class LazyMaterialization[T: ClassTag](name: String, } } + /** + * Repartitions data to default parallelism and returns as a repartitioned IntervalRDD + * + * @param data Keyed RDD of ReferenceRegion, (Keyvalue, data) pair + * @return new Interval RDD of repartitioned data + */ + private def partitionIntervalRDD(data: RDD[(ReferenceRegion, (String, T))]): IntervalRDD[ReferenceRegion, (String, T)] = { + if (data.getNumPartitions != sc.defaultParallelism) { + log.warn(s"Warning data partitioner of size ${data.getNumPartitions} " + + s"does not equal default of ${sc.defaultParallelism}. Repartitioning..") + IntervalRDD(data.partitionBy(new HashPartitioner(sc.defaultParallelism))) + } else { + IntervalRDD(data) + } + } + + /** + * Loads data from all files in materialization structure. + * + * @note: Modifies chromosome prefix depending on any discrepancies between the region requested and the + * sequence dictionary. + * + * @param regions Optional region to fetch. If none, fetches all data + * @return RDD of data. Primary index is ReferenceRegion and secondary index is filename. + */ + private def loadAllFiles(regions: Option[Iterable[ReferenceRegion]]): RDD[(ReferenceRegion, (String, T))] = { + // do we need to modify the chromosome prefix? + val hasChrPrefix = sd.records.head.name.startsWith("chr") + + LazyMaterializationTimers.loadFiles.time { + + // get data for all files + files.map(fp => { + val k = LazyMaterialization.filterKeyFromFile(fp) + load(fp, regions).map(v => (k, v)) + }).reduce(_ union _).map(r => { + val region = LazyMaterialization.modifyChrPrefix(getReferenceRegion(r._2), hasChrPrefix) + (region, (r._1, setContigName(r._2, region.referenceName))) + }) + } + } + /** * Checks memory across all executors * @return */ private def checkMemory() = { - val mem = sc.getExecutorMemoryStatus - val (total, available) = mem.map(_._2) - .reduce((e1, e2) => (e1._1 + e2._1, e1._2 + e2._2)) - val fraction: Double = (total - available).toFloat / total - - // if memory usage exceeds 85%, drop last viewed chromosome - if (fraction > memoryFraction) { - val dropped = bookkeep.dropValues() - log.warn(s"memory limit exceeded. Dropping ${dropped} from cache") - intRDD = intRDD.filter(_._1.referenceName != dropped) + LazyMaterializationTimers.checkMemory.time { + + val mem = sc.getExecutorMemoryStatus + val (total, available) = mem.map(_._2) + .reduce((e1, e2) => (e1._1 + e2._1, e1._2 + e2._2)) + val fraction: Double = (total - available).toFloat / total + + // if memory usage exceeds 85%, drop last viewed chromosome + if (fraction > memoryFraction) { + val dropped = bookkeep.dropValues() + log.warn(s"memory limit exceeded. Dropping ${dropped} from cache") + intRDD = intRDD.filter(_._1.referenceName != dropped) + } } } @@ -287,15 +328,24 @@ object LazyMaterialization { * should also search "20", and "20" should also trigger the search of "chr20". * * @param region ReferenceRegion to modify referenceName - * @return Tuple2 of ReferenceRegions, with and without the "chr" prefix + * @return Array of ReferenceRegions, with and without the "chr" prefix + */ + def getContigPredicate(region: ReferenceRegion): Array[ReferenceRegion] = { + getContigPredicate(region.referenceName).map(r => region.copy(referenceName = r)) + } + + /** + * Gets predicate reference name options for chromosome name based on searchable region. For example, "chr20" + * should also search "20", and "20" should also trigger the search of "chr20". + * + * @param referenceName referenceName + * @return Array of of referencenames, with and without the "chr" prefix */ - def getContigPredicate(region: ReferenceRegion): Tuple2[ReferenceRegion, ReferenceRegion] = { - if (region.referenceName.startsWith("chr")) { - val modifiedRegion = ReferenceRegion(region.referenceName.drop(3), region.start, region.end, region.strand) - Tuple2(region, modifiedRegion) + def getContigPredicate(referenceName: String): Array[String] = { + if (referenceName.startsWith("chr")) { + Array(referenceName, referenceName.drop(3)) } else { - val modifiedRegion = ReferenceRegion(("chr").concat(region.referenceName), region.start, region.end, region.strand) - Tuple2(region, modifiedRegion) + Array(referenceName, ("chr").concat(referenceName)) } } } diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/Materializer.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/Materializer.scala new file mode 100644 index 000000000..25a5bd61d --- /dev/null +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/Materializer.scala @@ -0,0 +1,85 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.mango.models + +case class Materializer(objects: Seq[LazyMaterialization[_, _]]) { + + /** + * Access arbitrary structure by name + * + * @param name Name to reference Materialization structure by + * @return Optional LazyMaterialization Structure + */ + def get(name: String): Option[LazyMaterialization[_, _]] = { + objects.find(r => r.getName == name) + } + + /** + * Access functions for materializer + */ + def getReads(): Option[AlignmentRecordMaterialization] = { + val x = objects.flatMap(r => + r match { + case m: AlignmentRecordMaterialization => Some(m) + case _ => None + }) + if (x.isEmpty) None + else Some(x.head) + } + + def getCoverage(): Option[CoverageMaterialization] = { + val x = objects.flatMap(r => + r match { + case m: CoverageMaterialization => Some(m) + case _ => None + }) + if (x.isEmpty) None + else Some(x.head) + } + + def getVariantContext(): Option[VariantContextMaterialization] = { + val x = objects.flatMap(r => + r match { + case m: VariantContextMaterialization => Some(m) + case _ => None + }) + if (x.isEmpty) None + else Some(x.head) + } + + def getFeatures(): Option[FeatureMaterialization] = { + val x = objects.flatMap(r => + r match { + case m: FeatureMaterialization => Some(m) + case _ => None + }) + if (x.isEmpty) None + else Some(x.head) + } + + /** + * definitions tracking whether optional datatypes were loaded + */ + def readsExist: Boolean = getReads().isDefined + + def coveragesExist: Boolean = getCoverage().isDefined + + def variantContextExist: Boolean = getVariantContext().isDefined + + def featuresExist: Boolean = getFeatures().isDefined +} diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/models/VariantContextMaterialization.scala b/mango-core/src/main/scala/org/bdgenomics/mango/models/VariantContextMaterialization.scala index 3f1d6b72e..b4aa9f4f2 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/models/VariantContextMaterialization.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/models/VariantContextMaterialization.scala @@ -20,8 +20,6 @@ package org.bdgenomics.mango.models import java.io.{ PrintWriter, StringWriter } import net.liftweb.json.Serialization.write -import org.apache.parquet.filter2.dsl.Dsl._ -import org.apache.parquet.filter2.predicate.FilterPredicate import org.apache.spark._ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary } @@ -29,6 +27,7 @@ import org.bdgenomics.adam.projections.{ Projection, VariantField } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD } import org.bdgenomics.formats.avro.{ Variant, GenotypeAllele } +import org.bdgenomics.mango.core.util.ResourceUtils import org.bdgenomics.mango.layout.GenotypeJson /* @@ -38,11 +37,10 @@ import org.bdgenomics.mango.layout.GenotypeJson class VariantContextMaterialization(@transient sc: SparkContext, files: List[String], sd: SequenceDictionary, - prefetchSize: Option[Int] = None) - extends LazyMaterialization[GenotypeJson]("VariantContextRDD", sc, files, sd, prefetchSize) + prefetchSize: Option[Long] = None) + extends LazyMaterialization[GenotypeJson, GenotypeJson](VariantContextMaterialization.name, sc, files, sd, prefetchSize) with Serializable { - @transient implicit val formats = net.liftweb.json.DefaultFormats // placeholder used for ref/alt positions to display in browser val variantPlaceholder = "N" @@ -58,8 +56,8 @@ class VariantContextMaterialization(@transient sc: SparkContext, * * @return Generic RDD of data types from file */ - def load = (file: String, region: Option[ReferenceRegion]) => - VariantContextMaterialization.toGenotypeJsonRDD(VariantContextMaterialization.load(sc, file, region)) + def load = (file: String, regions: Option[Iterable[ReferenceRegion]]) => + VariantContextMaterialization.toGenotypeJsonRDD(VariantContextMaterialization.load(sc, file, regions)) /** * Reset ReferenceName for Variant @@ -78,32 +76,32 @@ class VariantContextMaterialization(@transient sc: SparkContext, * @return Map of (key, json) for the ReferenceRegion specified * N */ - def stringify(data: RDD[(String, GenotypeJson)]): Map[String, String] = { - - val flattened: Map[String, Array[String]] = data - .collect - .groupBy(_._1).map(r => (r._1, r._2.map(_._2.toString()))) + def toJson(data: RDD[(String, GenotypeJson)]): Map[String, Array[GenotypeJson]] = { + data.collect + .groupBy(_._1).map(r => (r._1, r._2.map(_._2))) + } - // write variants to json - flattened.mapValues(write(_)) + override def stringify(data: Array[GenotypeJson]): String = { + write(data.map(_.toString)) } /** * Formats raw data from RDD to JSON. * * @param region Region to obtain coverage for + * @param verbose For VariantContext, determines whether genotypes are fetched * @param binning Tells what granularity of coverage to return. Used for large regions * @return JSONified data map; */ - def getJson(region: ReferenceRegion, - showGenotypes: Boolean, - binning: Int = 1): Map[String, String] = { - val data: RDD[(String, GenotypeJson)] = get(region) + override def getJson(region: ReferenceRegion, + verbose: Boolean = true, + binning: Int = 1): Map[String, Array[GenotypeJson]] = { + val data: RDD[(String, GenotypeJson)] = get(Some(region)) val binnedData: RDD[(String, GenotypeJson)] = if (binning <= 1) { - if (!showGenotypes) - data.map(r => (r._1, GenotypeJson(r._2.variant, null))) + if (!verbose) + data.map(r => (r._1, GenotypeJson(r._2.variant))) else data } else { bin(data, binning) @@ -119,7 +117,7 @@ class VariantContextMaterialization(@transient sc: SparkContext, (r._1._1, GenotypeJson(binned)) }) } - stringify(binnedData) + toJson(binnedData) } /** @@ -138,20 +136,22 @@ class VariantContextMaterialization(@transient sc: SparkContext, */ object VariantContextMaterialization { + val name = "VariantContext" + /** * Loads variant data from adam and vcf files into a VariantContextRDD * * @param sc SparkContext * @param fp filePath to load - * @param region Region to predicate load + * @param regions Iterable of ReferenceRegions to predicate load * @return VariantContextRDD */ - def load(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): VariantContextRDD = { + def load(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): VariantContextRDD = { if (fp.endsWith(".adam")) { - loadAdam(sc, fp, region) + loadAdam(sc, fp, regions) } else { try { - loadVariantContext(sc, fp, region) + loadVariantContext(sc, fp, regions) } catch { case e: Exception => { val sw = new StringWriter @@ -167,15 +167,16 @@ object VariantContextMaterialization { * * @param sc SparkContext * @param fp filePath to vcf file - * @param region Region to predicate load + * @param regions Iterable of ReferencesRegion to predicate load * @return VariantContextRDD */ - def loadVariantContext(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): VariantContextRDD = { - region match { - case Some(_) => - val regions = LazyMaterialization.getContigPredicate(region.get) - sc.loadIndexedVcf(fp, Iterable(regions._1, regions._2)) - case None => sc.loadVcf(fp) + def loadVariantContext(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): VariantContextRDD = { + if (regions.isDefined) { + val predicateRegions: Iterable[ReferenceRegion] = regions.get + .flatMap(r => LazyMaterialization.getContigPredicate(r)) + sc.loadIndexedVcf(fp, predicateRegions) + } else { + sc.loadVcf(fp) } } @@ -184,19 +185,18 @@ object VariantContextMaterialization { * * @param sc SparkContext * @param fp filePath to load variants from - * @param region Region to predicate load + * @param regions Iterable of ReferenceRegions to predicate load * @return VariantContextRDD */ - def loadAdam(sc: SparkContext, fp: String, region: Option[ReferenceRegion]): VariantContextRDD = { - val pred: Option[FilterPredicate] = - region match { - case Some(_) => - val contigs = LazyMaterialization.getContigPredicate(region.get) - val contigPredicate = (BinaryColumn("variant.contig.contigName") === contigs._1.referenceName - || BinaryColumn("variant.contig.contigName") === contigs._2.referenceName) - Some((LongColumn("variant.end") >= region.get.start) && (LongColumn("variant.start") <= region.get.end) && contigPredicate) - case None => None + def loadAdam(sc: SparkContext, fp: String, regions: Option[Iterable[ReferenceRegion]]): VariantContextRDD = { + val pred = + if (regions.isDefined) { + val prefixRegions: Iterable[ReferenceRegion] = regions.get.map(r => LazyMaterialization.getContigPredicate(r)).flatten + Some(ResourceUtils.formReferenceRegionPredicate(prefixRegions)) + } else { + None } + val proj = Projection(VariantField.contigName, VariantField.start, VariantField.referenceAllele, VariantField.alternateAllele, VariantField.end) sc.loadParquetGenotypes(fp, predicate = pred, projection = Some(proj)).toVariantContextRDD } diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/util/Bookkeep.scala b/mango-core/src/main/scala/org/bdgenomics/mango/util/Bookkeep.scala index 808c9caf0..5630b7b61 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/util/Bookkeep.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/util/Bookkeep.scala @@ -17,12 +17,11 @@ */ package org.bdgenomics.mango.util -import org.bdgenomics.adam.models.ReferenceRegion +import org.bdgenomics.adam.models.{ SequenceDictionary, ReferenceRegion } import org.bdgenomics.utils.interval.array.{ IntervalArray, ConcreteIntervalArray } import org.bdgenomics.utils.misc.Logging import scala.collection.mutable import scala.collection.mutable.{ HashMap, ListBuffer } -import scala.reflect.ClassTag /** * Bookkeep keeps track of what chunks of data have been loaded into memory. This is @@ -30,7 +29,7 @@ import scala.reflect.ClassTag * which stores the regions that have been loaded for each id (which is a string) * @param chunkSize Chunk size is the size at which data is loaded into memory */ -class Bookkeep(chunkSize: Int) extends Serializable with Logging { +class Bookkeep(chunkSize: Long) extends Serializable with Logging { /* * The maximum width across all intervals in this bookkeeping structure. @@ -40,7 +39,7 @@ class Bookkeep(chunkSize: Int) extends Serializable with Logging { /* * Holds hash of ReferenceName pointing to IntervalTree of (Region, ID) */ - var bookkeep: IntervalArray[ReferenceRegion, List[String]] = new ConcreteIntervalArray(Array.empty[(ReferenceRegion, List[String])], maxIntervalWidth) + var intArray: IntervalArray[ReferenceRegion, List[String]] = new ConcreteIntervalArray(Array.empty[(ReferenceRegion, List[String])], maxIntervalWidth) /** * Keeps track of ordering of most recently viewed chromosomes @@ -49,13 +48,18 @@ class Bookkeep(chunkSize: Int) extends Serializable with Logging { def rememberValues(region: ReferenceRegion, k: String): Unit = rememberValues(region, List(k)) + def rememberValues(sd: SequenceDictionary, ks: List[String]): Unit = { + sd.records.map(r => ReferenceRegion(r.name, 0, Bookkeep.roundDown(r.length + chunkSize - 1, chunkSize))) + .foreach(r => rememberValues(r, ks)) + } + /** * Drops all values from a given sequence record */ def dropValues(): String = { try { val droppedChr = queue.dequeue() - bookkeep = bookkeep.filter(_._1.referenceName != droppedChr) + intArray = intArray.filter(_._1.referenceName != droppedChr) droppedChr } catch { case e: NoSuchElementException => @@ -73,54 +77,57 @@ class Bookkeep(chunkSize: Int) extends Serializable with Logging { def rememberValues(region: ReferenceRegion, ks: List[String]): Unit = { if (!queue.contains(region.referenceName)) queue.enqueue(region.referenceName) - bookkeep = bookkeep.insert(Iterator((region, ks))) + intArray = intArray.insert(Iterator((region, ks))) } /** - * generates a list of reference regions that were not found in bookkeeping structure + * Generates a list of reference regions that were not found in bookkeeping structure. * * @param region that is divided into chunks and searched for in bookkeeping structure * @param ks in which region is searched over. these are sample IDs * @return List of reference regions not found in bookkeeping structure */ - def getMissingRegions(region: ReferenceRegion, ks: List[String]): Option[List[ReferenceRegion]] = { - var regions: ListBuffer[ReferenceRegion] = new ListBuffer[ReferenceRegion]() - var start = region.start / chunkSize * chunkSize - val chunkEnd = region.end / chunkSize * chunkSize + chunkSize - var end = start + chunkSize - - while (end <= chunkEnd) { - val r = new ReferenceRegion(region.referenceName, start, end) - val size = { - try { - bookkeep.get(r).size - } catch { - case ex: NoSuchElementException => 0 - } - } - if (size < ks.size) { - regions += r - } - start += chunkSize - end += chunkSize - } - - if (regions.isEmpty) { - None - } else { - Some(Bookkeep.mergeRegions(regions.toList)) - } - + def getMissingRegions(region: ReferenceRegion, ks: List[String]): List[ReferenceRegion] = { + // get number of chunks in region + val roundedStart = Bookkeep.roundDown(region.start, chunkSize) + val roundedEnd = Bookkeep.roundDown(region.end + chunkSize - 1, chunkSize) + + val chunkCount = Math.max(1, ((roundedEnd - roundedStart) / chunkSize).toInt) + + // get chunked reference regions + val chunks = (0 until chunkCount) + .map(c => { + val start = c * chunkSize + roundedStart + val end = start + chunkSize + region.copy(start = start, end = end) + }) + + // filter out chunks that are covered by intArray + val missing = chunks.filter(c => intArray.filter(elem => elem._1.contains(c)).length == 0) + Bookkeep.mergeRegions(missing.toList) } } object Bookkeep { /** - * generates a list of closely overlapping regions, counting for gaps in the list + * Rounds numbers down to the neares 'rounded' + * + * @param number number to round + * @param rounded number rounded to + * @return new rounded number + */ + + def roundDown(number: Long, rounded: Long): Long = { + number - (number % rounded) + } + + /** + * Merges together overlapping ReferenceRegions in a list of ReferenceRegions. * * @note For example, given a list of regions with ranges (0, 999), (1000, 1999) and (3000, 3999) * This function will consolidate adjacent regions and output (0, 1999), (3000, 3999) + * * @param regions list of regions to merge * @return Option of list of merged adjacent regions */ diff --git a/mango-core/src/main/scala/org/bdgenomics/mango/util/ResourceUtils.scala b/mango-core/src/main/scala/org/bdgenomics/mango/util/ResourceUtils.scala index fcf026537..743f46334 100644 --- a/mango-core/src/main/scala/org/bdgenomics/mango/util/ResourceUtils.scala +++ b/mango-core/src/main/scala/org/bdgenomics/mango/util/ResourceUtils.scala @@ -20,10 +20,27 @@ package org.bdgenomics.mango.core.util import java.io._ import org.apache.hadoop.fs.{ FileSystem, Path } +import org.apache.parquet.filter2.dsl.Dsl._ +import org.apache.parquet.filter2.predicate.FilterPredicate +import org.apache.parquet.io.api.Binary import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.ReferenceRegion object ResourceUtils { + /** + * Builds a parquet predicate from an Iterable of ReferenceRegions + * + * @param regions ReferenceRegions to construct predicate from + * @return FilterPredicate + */ + def formReferenceRegionPredicate(regions: Iterable[ReferenceRegion]): FilterPredicate = { + regions.map(r => { + ((LongColumn("end") >= r.start) && (LongColumn("start") <= r.end) && + (BinaryColumn("contigName") === Binary.fromString(r.referenceName))) + }).reduce(_ || _) + } + /** * Prints java heap map availability and usage */ diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/filters/DiscoverySuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/filters/DiscoverySuite.scala index 76c4ac927..7a760507e 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/filters/DiscoverySuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/filters/DiscoverySuite.scala @@ -30,7 +30,7 @@ class DiscoverySuite extends MangoFunSuite { val sd = new SequenceDictionary(Vector(SequenceRecord("chrM", 1000000))) // length to ensure 1000 size windows val discovery = new Discovery(sd) - val features = FeatureMaterialization.load(sc, None, bedFile).rdd.map(r => ReferenceRegion.unstranded(r)) + val features = FeatureMaterialization.load(sc, bedFile, None).rdd.map(r => ReferenceRegion.unstranded(r)) val mergedRegions = discovery.getFrequencies(features) assert(mergedRegions.length == 3) @@ -41,7 +41,7 @@ class DiscoverySuite extends MangoFunSuite { val sd = new SequenceDictionary(Vector(SequenceRecord("chrN", 1000000))) // length to ensure 1000 size windows val discovery = new Discovery(sd) - val features = FeatureMaterialization.load(sc, None, bedFile).rdd.map(r => ReferenceRegion.unstranded(r)) + val features = FeatureMaterialization.load(sc, bedFile, None).rdd.map(r => ReferenceRegion.unstranded(r)) val mergedRegions = discovery.getFrequencies(features) assert(mergedRegions.length == 0) diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/AlignmentRecordMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/AlignmentRecordMaterializationSuite.scala index 6f3aca675..064897c4a 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/AlignmentRecordMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/AlignmentRecordMaterializationSuite.scala @@ -44,19 +44,19 @@ class AlignmentRecordMaterializationSuite extends MangoFunSuite { val files = List(bamFile) sparkTest("create new AlignmentRecordMaterialization") { - val lazyMat = AlignmentRecordMaterialization(sc, files, dict) + val lazyMat = new AlignmentRecordMaterialization(sc, files, dict) } sparkTest("return raw data from AlignmentRecordMaterialization") { - val data = AlignmentRecordMaterialization(sc, files, dict) + val data = new AlignmentRecordMaterialization(sc, files, dict) val region = new ReferenceRegion("chrM", 0L, 900L) val results = data.getJson(region).get(key).get } sparkTest("return coverage from AlignmentRecordMaterialization") { - val data = AlignmentRecordMaterialization(sc, files, dict) + val data = new AlignmentRecordMaterialization(sc, files, dict) val region = new ReferenceRegion("chrM", 0L, 20L) val freq = data.getCoverage(region).get(key).get val coverage = parse(freq).extract[Array[PositionCount]] @@ -65,17 +65,25 @@ class AlignmentRecordMaterializationSuite extends MangoFunSuite { } sparkTest("return coverage overlapping multiple materialized nodes") { - val data = AlignmentRecordMaterialization(sc, files, dict) + val data = new AlignmentRecordMaterialization(sc, files, dict) val region = new ReferenceRegion("chrM", 90L, 110L) val freq = data.getCoverage(region).get(key).get val coverage = parse(freq).extract[Array[PositionCount]].sortBy(_.start) assert(coverage.length == region.length()) } + sparkTest("fetches multiple regions from load") { + val regions = Some(Iterable(ReferenceRegion("chrM", 90L, 110L), ReferenceRegion("chrM", 10100L, 10300L))) + val data1 = AlignmentRecordMaterialization.load(sc, bamFile, Some(Iterable(ReferenceRegion("chrM", 90L, 110L)))) + val data2 = AlignmentRecordMaterialization.load(sc, bamFile, Some(Iterable(ReferenceRegion("chrM", 10100L, 10300L)))) + val data = AlignmentRecordMaterialization.load(sc, bamFile, regions) + assert(data.rdd.count == data1.rdd.count + data2.rdd.count) + } + sparkTest("Should handle chromosomes with different prefixes") { val dict = new SequenceDictionary(Vector(SequenceRecord("M", 16699L))) - val data = AlignmentRecordMaterialization(sc, files, dict) + val data = new AlignmentRecordMaterialization(sc, files, dict) val region = new ReferenceRegion("M", 90L, 110L) val freq = data.getCoverage(region).get(key).get val coverage = parse(freq).extract[Array[PositionCount]].sortBy(_.start) diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/AnnotationMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/AnnotationMaterializationSuite.scala index 646aaa86a..241a19a7e 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/AnnotationMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/AnnotationMaterializationSuite.scala @@ -21,7 +21,6 @@ package org.bdgenomics.mango.models import net.liftweb.json._ import org.bdgenomics.adam.models.ReferenceRegion import org.bdgenomics.mango.util.MangoFunSuite -import net.liftweb.json.Serialization._ class AnnotationMaterializationSuite extends MangoFunSuite { diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/CoverageMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/CoverageMaterializationSuite.scala index 695c1015e..a547606f8 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/CoverageMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/CoverageMaterializationSuite.scala @@ -22,7 +22,6 @@ import net.liftweb.json._ import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary, SequenceRecord } import org.bdgenomics.mango.layout.PositionCount import org.bdgenomics.mango.util.MangoFunSuite -import org.bdgenomics.adam.rdd.ADAMContext._ class CoverageMaterializationSuite extends MangoFunSuite { @@ -37,42 +36,54 @@ class CoverageMaterializationSuite extends MangoFunSuite { val files = List(coverageFile) sparkTest("create new CoverageRecordMaterialization") { - val lazyMat = CoverageMaterialization(sc, files, dict) + val lazyMat = new CoverageMaterialization(sc, files, dict) } sparkTest("return coverage from CoverageRecordMaterialization") { - val data = CoverageMaterialization(sc, files, dict) + val data = new CoverageMaterialization(sc, files, dict) val region = new ReferenceRegion("chrM", 0L, 20L) - val freq = data.getCoverage(region).get(key).get + val coverage = data.getJson(region).get(key).get + assert(coverage.length == region.length()) + } + + sparkTest("can parse coverage json") { + val data = new CoverageMaterialization(sc, files, dict) + val region = new ReferenceRegion("chrM", 0L, 20L) + val freq = data.stringify(data.getJson(region).get(key).get) val coverage = parse(freq).extract[Array[PositionCount]] assert(coverage.length == region.length()) } sparkTest("return sampled coverage from CoverageRecordMaterialization over large regions") { val binning = 10 - val data = CoverageMaterialization(sc, files, dict) + val data = new CoverageMaterialization(sc, files, dict) val region = new ReferenceRegion("chrM", 0L, 200L) - val freq = data.getCoverage(region, binning).get(key).get - val coverage = parse(freq).extract[Array[PositionCount]] + val coverage = data.getJson(region, binning = binning).get(key).get assert(coverage.length == region.length() / binning) } sparkTest("return coverage overlapping multiple materialized nodes") { - val data = CoverageMaterialization(sc, files, dict) - val region = new ReferenceRegion("chrM", 90L, 110L) - val freq = data.getCoverage(region).get(key).get - val coverage = parse(freq).extract[Array[PositionCount]].sortBy(_.start) + val data = new CoverageMaterialization(sc, files, dict) + val region = ReferenceRegion("chrM", 90L, 110L) + val coverage = data.getJson(region).get(key).get assert(coverage.length == region.length()) } sparkTest("Should handle chromosomes with different prefixes") { val dict = new SequenceDictionary(Vector(SequenceRecord("M", 16699L))) - val data = CoverageMaterialization(sc, files, dict) + val data = new CoverageMaterialization(sc, files, dict) val region = new ReferenceRegion("M", 90L, 110L) - val freq = data.getCoverage(region).get(key).get - val coverage = parse(freq).extract[Array[PositionCount]].sortBy(_.start) + val coverage = data.getJson(region).get(key).get assert(coverage.length == region.length()) } + sparkTest("fetches multiple regions from load") { + val regions = Some(Iterable(ReferenceRegion("chrM", 90L, 110L), ReferenceRegion("chrM", 10100L, 10300L))) + val data1 = CoverageMaterialization.load(sc, coverageFile, Some(Iterable(ReferenceRegion("chrM", 90L, 110L)))) + val data2 = CoverageMaterialization.load(sc, coverageFile, Some(Iterable(ReferenceRegion("chrM", 10100L, 10300L)))) + val data = CoverageMaterialization.load(sc, coverageFile, regions) + assert(data.rdd.count == data1.rdd.count + data2.rdd.count) + } + } diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/FeatureMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/FeatureMaterializationSuite.scala index 0798fda96..32d70dfe5 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/FeatureMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/FeatureMaterializationSuite.scala @@ -43,7 +43,7 @@ class FeatureMaterializationSuite extends MangoFunSuite { val region = new ReferenceRegion("chrM", 1000L, 1200L) val json = data.getJson(region) - assert(json.contains(key)) + assert(json.contains(key) && json(key).length == 2) } sparkTest("can fetch multiple files") { @@ -53,7 +53,7 @@ class FeatureMaterializationSuite extends MangoFunSuite { assert(json.contains(key) && json.contains(key2)) - val keyData = parse(json.get(key).get).extract[Array[BedRowJson]] + val keyData = parse(data.stringify(json.get(key).get)).extract[Array[BedRowJson]] .sortBy(_.start) assert(keyData.length == 2) @@ -68,7 +68,8 @@ class FeatureMaterializationSuite extends MangoFunSuite { val json = data.getJson(region) assert(json.contains(key) && json.contains(key2)) - + assert(json(key).length == 2) + assert(json(key2).length == 2) } sparkTest("Bins features over large ranges") { @@ -77,10 +78,20 @@ class FeatureMaterializationSuite extends MangoFunSuite { val data = new FeatureMaterialization(sc, List(bedFile, bedFile2), dict) val region = new ReferenceRegion("M", 1000L, 1200L) val json = data.getJson(region, binning = 200) - val keyData = parse(json.get(key).get).extract[Array[BedRowJson]] + val keyData = parse(data.stringify(json.get(key).get)).extract[Array[BedRowJson]] assert(keyData.length == 1) assert(keyData.head.start == 1000) assert(keyData.head.stop == 1210) // should extend longest feature in bin } + sparkTest("fetches multiple regions from load") { + val region1 = ReferenceRegion("chrM", 100L, 200L) + val region2 = ReferenceRegion("chrM", 3000L, 3100L) + val regions = Some(Iterable(region1, region2)) + val data1 = FeatureMaterialization.load(sc, bedFile, Some(Iterable(region1))) + val data2 = FeatureMaterialization.load(sc, bedFile, Some(Iterable(region1))) + val data = FeatureMaterialization.load(sc, bedFile, regions) + assert(data.rdd.count == data1.rdd.count + data2.rdd.count) + } + } diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/LazyMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/LazyMaterializationSuite.scala index e7f8288c6..911395897 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/LazyMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/LazyMaterializationSuite.scala @@ -21,7 +21,7 @@ package org.bdgenomics.mango.models import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary, SequenceRecord } -import org.bdgenomics.mango.util.{ Bookkeep, MangoFunSuite } +import org.bdgenomics.mango.util.MangoFunSuite class LazyMaterializationSuite extends MangoFunSuite { @@ -57,10 +57,10 @@ class LazyMaterializationSuite extends MangoFunSuite { sparkTest("Should check and clear memory") { val lazyDummy = new LazyDummy(sc, List("FakeFile"), sd) lazyDummy.setMemoryFraction(0.0000001) // this is a very low test value - lazyDummy.get(ReferenceRegion("chrM", 0, 10L)).count + lazyDummy.get(Some(ReferenceRegion("chrM", 0, 10L))).count assert(lazyDummy.bookkeep.queue.contains("chrM")) - lazyDummy.get(ReferenceRegion("20", 0, 10L)).count + lazyDummy.get(Some(ReferenceRegion("20", 0, 10L))).count // these calls should have removed chrM from cache assert(!lazyDummy.bookkeep.queue.contains("chrM")) @@ -76,27 +76,22 @@ class LazyMaterializationSuite extends MangoFunSuite { */ class LazyDummy(@transient sc: SparkContext, files: List[String], - sd: SequenceDictionary) extends LazyMaterialization[ReferenceRegion]("TestRDD", sc, files, sd, Some(100)) with Serializable { - @transient implicit val formats = net.liftweb.json.DefaultFormats + sd: SequenceDictionary) extends LazyMaterialization[ReferenceRegion, ReferenceRegion]("TestRDD", sc, files, sd, Some(100L)) with Serializable { def getReferenceRegion = (r: ReferenceRegion) => r - def load = (file: String, region: Option[ReferenceRegion]) => { - sc.parallelize(Array.range(region.get.start.toInt, region.get.end.toInt) - .map(r => ReferenceRegion(region.get.referenceName, r, r + 1))) + def load = (file: String, regions: Option[Iterable[ReferenceRegion]]) => { + val region = regions.get.head + sc.parallelize(Array.range(region.start.toInt, region.end.toInt) + .map(r => ReferenceRegion(region.referenceName, r, r + 1))) } def setContigName = (r: ReferenceRegion, contig: String) => { ReferenceRegion(contig, r.start, r.end) r } - - def stringify(data: RDD[(String, ReferenceRegion)]): Map[String, String] = { - data - .collect - .groupBy(_._1) - .map(r => (r._1, r._2.map(_._2))) - .mapValues(r => r.map(f => f.toString).mkString(",")) + def toJson(data: RDD[(String, ReferenceRegion)]): Map[String, Array[ReferenceRegion]] = { + data.collect.groupBy(_._1).map(r => (r._1, r._2.map(_._2))) } -} +} diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/MaterializerSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/MaterializerSuite.scala new file mode 100644 index 000000000..4c5f20d50 --- /dev/null +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/MaterializerSuite.scala @@ -0,0 +1,64 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.mango.models + +import org.bdgenomics.adam.models.{ SequenceRecord, SequenceDictionary } +import org.bdgenomics.mango.util.MangoFunSuite + +class MaterializerSuite extends MangoFunSuite { + + // test alignment data + val bamFile = resourcePath("mouse_chrM.bam") + val dict = new SequenceDictionary(Vector(SequenceRecord("chrM", 16699L))) + + sparkTest("verifies objects exist") { + val alignmentMat = new AlignmentRecordMaterialization(sc, List(bamFile), dict) + val coverageMat = new CoverageMaterialization(sc, List(bamFile), dict) + + val mat = Materializer(Seq(alignmentMat, coverageMat)) + + assert(mat.readsExist) + assert(mat.coveragesExist) + assert(!mat.featuresExist) + assert(!mat.variantContextExist) + } + + sparkTest("Can fetch materialization objects") { + val alignmentMat = new AlignmentRecordMaterialization(sc, List(bamFile), dict) + val coverageMat = new CoverageMaterialization(sc, List(bamFile), dict) + + val mat = Materializer(Seq(alignmentMat, coverageMat)) + + assert(mat.getReads.isDefined) + assert(mat.getCoverage.isDefined) + assert(!mat.getFeatures.isDefined) + assert(!mat.getVariantContext.isDefined) + } + + sparkTest("can fetch arbitrary object through get") { + val alignmentMat = new AlignmentRecordMaterialization(sc, List(bamFile), dict) + val coverageMat = new CoverageMaterialization(sc, List(bamFile), dict) + + val mat = Materializer(Seq(alignmentMat, coverageMat)) + + assert(mat.get(AlignmentRecordMaterialization.name).isDefined) + assert(mat.get(CoverageMaterialization.name).isDefined) + assert(!mat.get(FeatureMaterialization.name).isDefined) + } + +} \ No newline at end of file diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/models/VariantContextMaterializationSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/models/VariantContextMaterializationSuite.scala index e915fa3ae..2c796b67f 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/models/VariantContextMaterializationSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/models/VariantContextMaterializationSuite.scala @@ -47,7 +47,19 @@ class VariantContextMaterializationSuite extends MangoFunSuite { val data = new VariantContextMaterialization(sc, List(vcfFile1), sd) val json = data.getJson(region).get(key).get - val vAndg = parse(json).extract[Array[String]].map(r => GenotypeJson(r)) + val vAndg = json.sortBy(_.variant.getStart) + + assert(vAndg.length == 3) + assert(vAndg.head.sampleIds.length == 2) + + } + + sparkTest("Can extract json") { + val region = new ReferenceRegion("chrM", 0, 999) + val data = new VariantContextMaterialization(sc, List(vcfFile1), sd) + val json = data.getJson(region).get(key).get + + val vAndg = parse(data.stringify(json)).extract[Array[String]].map(r => GenotypeJson(r)) .sortBy(_.variant.getStart) assert(vAndg.length == 3) @@ -76,8 +88,7 @@ class VariantContextMaterializationSuite extends MangoFunSuite { val data = new VariantContextMaterialization(sc, List(vcfFile2), sd) val json = data.getJson(region).get(key2).get - val vAndg = parse(json).extract[Array[String]].map(r => GenotypeJson(r)) - .sortBy(_.variant.getStart) + val vAndg = json.sortBy(_.variant.getStart) assert(vAndg.length == 7) assert(vAndg.head.sampleIds.length == 0) @@ -88,11 +99,11 @@ class VariantContextMaterializationSuite extends MangoFunSuite { val region = new ReferenceRegion("chrM", 0, 999) val data = new VariantContextMaterialization(sc, vcfFiles, sd) val json = data.getJson(region) - var vAndg = parse(json.get(key).get).extract[Array[String]].map(r => GenotypeJson(r)) + var vAndg = json.get(key).get assert(vAndg.length == 3) - vAndg = parse(json.get(key2).get).extract[Array[String]].map(r => GenotypeJson(r)) + vAndg = json.get(key2).get assert(vAndg.length == 7) } @@ -100,7 +111,7 @@ class VariantContextMaterializationSuite extends MangoFunSuite { sparkTest("Should bin and not return genotypes at zoomed out regions") { val region = new ReferenceRegion("chrM", 0, 999) val data = new VariantContextMaterialization(sc, List(vcfFile1), sd) - val json = data.getJson(region, true, binning = 20).get(key).get + val json = data.stringify(data.getJson(region, false, binning = 20).get(key).get) val vAndg = parse(json).extract[Array[String]].map(r => GenotypeJson(r)) .sortBy(_.variant.getStart) @@ -120,13 +131,24 @@ class VariantContextMaterializationSuite extends MangoFunSuite { val region = new ReferenceRegion("M", 0, 999) val data = new VariantContextMaterialization(sc, vcfFiles, sd) val json = data.getJson(region) - var vAndg = parse(json.get(key).get).extract[Array[String]].map(r => GenotypeJson(r)) + var vAndg = json.get(key).get assert(vAndg.length == 3) - vAndg = parse(json.get(key2).get).extract[Array[String]].map(r => GenotypeJson(r)) + vAndg = json.get(key2).get assert(vAndg.length == 7) } -} + sparkTest("fetches multiple regions from load") { + val region1 = ReferenceRegion("chrM", 10L, 30L) + val region2 = ReferenceRegion("chrM", 50L, 60L) + val regions = Some(Iterable(region1, region2)) + val data1 = VariantContextMaterialization.load(sc, vcfFile1, Some(Iterable(region1))) + val data2 = VariantContextMaterialization.load(sc, vcfFile1, Some(Iterable(region2))) + val data = VariantContextMaterialization.load(sc, vcfFile1, regions) + + assert(data.rdd.count == data1.rdd.count + data2.rdd.count) + } + +} \ No newline at end of file diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/util/BookkeepSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/util/BookkeepSuite.scala index 87b3993b0..5e243b1bc 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/util/BookkeepSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/util/BookkeepSuite.scala @@ -17,11 +17,11 @@ */ package org.bdgenomics.mango.util -import org.bdgenomics.adam.models.ReferenceRegion +import org.bdgenomics.adam.models.{ SequenceRecord, SequenceDictionary, ReferenceRegion } import org.scalatest.FunSuite class BookkeepSuite extends FunSuite { - val prefetchSize = 100 + val prefetchSize = 100L val sampleId = "id" val region1 = ReferenceRegion("chr1", 0, 100) @@ -31,8 +31,8 @@ class BookkeepSuite extends FunSuite { val bookkeep = new Bookkeep(prefetchSize) bookkeep.rememberValues(region1, sampleId) val regions = bookkeep.getMissingRegions(ReferenceRegion("chr1", 0, 850), List(sampleId)) - assert(regions.get.length == 1) - assert(regions.get.head.end == 900) + assert(regions.length == 1) + assert(regions.head.end == 900) } @@ -41,9 +41,9 @@ class BookkeepSuite extends FunSuite { bookkeep.rememberValues(region1, sampleId) bookkeep.rememberValues(region2, sampleId) val regions = bookkeep.getMissingRegions(ReferenceRegion("chr1", 0, 899), List(sampleId)) - assert(regions.get.length == 1) - assert(regions.get.head.start == 100) - assert(regions.get.head.end == 500) + assert(regions.length == 1) + assert(regions.head.start == 100) + assert(regions.head.end == 500) } @@ -54,9 +54,9 @@ class BookkeepSuite extends FunSuite { val newRegion = ReferenceRegion("chr2", 0, 100) bookkeep.rememberValues(newRegion, sampleId) val regions = bookkeep.getMissingRegions(ReferenceRegion("chr2", 0, 900), List(sampleId)) - assert(regions.get.length == 1) - assert(regions.get.head.start == 100) - assert(regions.get.head.end == 1000) + assert(regions.length == 1) + assert(regions.head.start == 100) + assert(regions.head.end == 900) } test("merges regions") { @@ -67,4 +67,25 @@ class BookkeepSuite extends FunSuite { assert(merged.length == 2) } + test("saves sequence dictionary") { + val samples = List("./workfiles/mouse_chrM.bam", "./workfiles/mouse_chrM_1.bam") + val sd = new SequenceDictionary(Vector(SequenceRecord("chrM", 16299L), SequenceRecord("chr1", 16299L))) + + val bookkeep = new Bookkeep(10000L) + + bookkeep.rememberValues(sd, samples) + + val missing = bookkeep.getMissingRegions(ReferenceRegion("chrM", 0, 10000), samples) + assert(missing.length == 0) + } + + test("get and put region where region equals chunk size") { + val newRegion = ReferenceRegion("chr1", 10000, 20000) + val bookkeep = new Bookkeep(10000) + bookkeep.rememberValues(newRegion, sampleId) + + val regions = bookkeep.getMissingRegions(newRegion, List(sampleId)) + assert(regions.length == 0) + } + } diff --git a/mango-core/src/test/scala/org/bdgenomics/mango/util/MangoFunSuite.scala b/mango-core/src/test/scala/org/bdgenomics/mango/util/MangoFunSuite.scala index 8a14534ea..04487393e 100644 --- a/mango-core/src/test/scala/org/bdgenomics/mango/util/MangoFunSuite.scala +++ b/mango-core/src/test/scala/org/bdgenomics/mango/util/MangoFunSuite.scala @@ -17,15 +17,9 @@ */ package org.bdgenomics.mango.util -import java.nio.file.Files - -import com.google.common.io.Resources import org.bdgenomics.utils.misc.SparkFunSuite -import org.scalatest.FunSuiteLike - -import scala.io.Source -trait MangoFunSuite extends SparkFunSuite with FunSuiteLike { +trait MangoFunSuite extends SparkFunSuite { override val appName: String = "mango" override val properties: Map[String, String] = Map( diff --git a/mango-play/app/controllers/Application.scala b/mango-play/app/controllers/Application.scala new file mode 100644 index 000000000..0e1002bb8 --- /dev/null +++ b/mango-play/app/controllers/Application.scala @@ -0,0 +1,338 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package controllers + +import javax.inject.Inject + +import org.bdgenomics.adam.models.ReferenceRegion +import org.bdgenomics.mango.core.util.VizUtils +import org.bdgenomics.mango.layout.{ BedRowJson, GenotypeJson, PositionCount } +import org.bdgenomics.mango.models._ +import org.bdgenomics.mango.util.Bookkeep +import org.ga4gh.GAReadAlignment +import play.Logger +import play.api.Play +import play.api.Play.current +import play.api.cache._ +import play.api.libs.json._ +import play.api.mvc._ +import utils.JsonImplicits._ +import utils.{ MangoServletWrapper, VizTimers } + +import scala.collection.mutable + +class MangoApplication @Inject() (cache: CacheApi) extends Controller { + + def sequenceDictionary = Action { + Ok(Json.toJson(MangoServletWrapper.globalDict)) + } + + def index = Action { request => + // set initial referenceRegion so it is defined. pick first chromosome to view + val firstChr = MangoServletWrapper.globalDict.records.head.name + Ok(views.html.index(MangoServletWrapper.formatClickableRegions(MangoServletWrapper.prefetchedRegions))).withSession( + request.session + ("contig" -> firstChr) + ("start" -> "1") + ("end" -> "100")) + } + + def browser = Action { request => + + // if session variable for reference region is not yet set, set it to first available chromosome in sd + val contig: String = request.session.get("contig").getOrElse(MangoServletWrapper.globalDict.records.head.name) + val start = request.session.get("start").getOrElse("1").toLong + val end = request.session.get("end").getOrElse("100").toLong + + // generate file keys for front end + val readsSamples: Option[List[(String, Option[String])]] = try { + val reads = MangoServletWrapper.materializer.getReads().get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r)) + + // check if there are precomputed coverage files for reads. If so, send this information to the frontend + // to avoid extra coverage computation + if (MangoServletWrapper.materializer.getCoverage().isDefined) { + Some(reads.map(r => { + val coverage = MangoServletWrapper.materializer.getCoverage().get.getFiles.map(c => LazyMaterialization.filterKeyFromFile(c)) + .find(c => { + c.contains(r) + }) + (r, coverage) + })) + } else Some(reads.map((_, None))) + + } catch { + case e: Exception => None + } + + val coverageSamples = try { + val coverage = MangoServletWrapper.materializer.getCoverage().get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r)) + + // filter out coverage samples that will be displayed with reads + if (readsSamples.isDefined) { + val readsCoverage = readsSamples.get.map(_._2).flatten + Some(coverage.filter(c => !readsCoverage.contains(c))) + } else Some(coverage) + } catch { + case e: Exception => None + } + + val variantSamples = try { + if (MangoServletWrapper.showGenotypes) + Some(MangoServletWrapper.materializer.getVariantContext().get.getGenotypeSamples().map(r => (LazyMaterialization.filterKeyFromFile(r._1), r._2.mkString(",")))) + else Some(MangoServletWrapper.materializer.getVariantContext().get.getFiles.map(r => (LazyMaterialization.filterKeyFromFile(r), ""))) + } catch { + case e: Exception => None + } + + val featureSamples = try { + Some(MangoServletWrapper.materializer.getFeatures().get.getFiles.map(r => LazyMaterialization.filterKeyFromFile(r))) + } catch { + case e: Exception => None + } + + Ok(views.html.browser(contig, start, end, + MangoServletWrapper.genes, coverageSamples, readsSamples, + variantSamples, featureSamples)) + } + + def quit = Action { + val thread = new Thread { + override def run() { + try { + Logger.info("Shutting down the server") + Play.application.stop() + MangoServletWrapper.sc.stop() + System.exit(0) + Logger.info("Server has stopped") + } catch { + case e: Exception => { + Logger.error("Error when stopping Jetty server: " + e.getMessage, e) + } + } + } + } + thread.start() + Ok("quit") + } + + def features(key: String, contig: String, start: Long, end: Long, binning: Int) = Action { + VizTimers.FeatRequest.time { + if (!MangoServletWrapper.materializer.featuresExist) + NotFound + else { + // if region is in bounds of reference, return data + val dictOpt = MangoServletWrapper.globalDict(contig) + if (dictOpt.isDefined) { + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, dictOpt)) + + val results = getFromCache[BedRowJson](viewRegion, key, FeatureMaterialization.name, Overlaps.overlapsFeature, binning = binning).toArray + + if (!results.isEmpty) { + Ok(MangoServletWrapper.materializer.getFeatures().get.stringify(results)) + } else NoContent + } else NotFound + } + } + } + + def reads(key: String, contig: String, start: Long, end: Long) = Action { + VizTimers.ReadsRequest.time { + if (!MangoServletWrapper.materializer.readsExist) { + NotFound + } else { + val dictOpt = MangoServletWrapper.globalDict(contig) + if (dictOpt.isDefined) { + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, dictOpt)) + + val results = getFromCache[GAReadAlignment](viewRegion, key, AlignmentRecordMaterialization.name, Overlaps.overlapsRead).toArray + if (!results.isEmpty) { + // filter elements by region + Ok(MangoServletWrapper.materializer.getReads().get.stringify(results)) + } else NoContent + } else NotFound + } + } + } + + def readCoverage(key: String, contig: String, start: Long, end: Long, binning: Int) = Action { + VizTimers.ReadsRequest.time { + if (!MangoServletWrapper.materializer.readsExist) { + NotFound + } else { + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, MangoServletWrapper.globalDict(contig))) + // get all coverage files that have been loaded + val coverageFiles = + if (MangoServletWrapper.materializer.coveragesExist) { + Some(MangoServletWrapper.materializer.getCoverage().get.getFiles.map(f => LazyMaterialization.filterKeyFromFile(f))) + } else None + + // check if there is a precomputed coverage file for this reads file + if (coverageFiles.isDefined && coverageFiles.get.contains(key)) { + + getCoverage(viewRegion, key, binning) + } else { + // no precomputed coverage. compute from reads + val dictOpt = MangoServletWrapper.globalDict(contig) + if (dictOpt.isDefined) { + + val results = getFromCache[GAReadAlignment](viewRegion, key, AlignmentRecordMaterialization.name, Overlaps.overlapsRead).toArray + + if (!results.isEmpty) { + // compute coverage from collected reads on the fly + val coverage = MangoServletWrapper.materializer.getReads.get.toCoverage(results, viewRegion) + Ok(MangoServletWrapper.materializer.getCoverage().get.stringify(coverage)) + } else NoContent + } else NotFound + } + } + } + } + + def coverage(key: String, contig: String, start: Long, end: Long, binning: Int) = Action { + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, MangoServletWrapper.globalDict(contig))) + getCoverage(viewRegion, key, binning) + } + + def variants(key: String, contig: String, start: Long, end: Long, binning: Int) = Action { + VizTimers.VarRequest.time { + if (!MangoServletWrapper.materializer.variantContextExist) + NotFound + else { + val dictOpt = MangoServletWrapper.globalDict(contig) + if (dictOpt.isDefined) { + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, dictOpt)) + + val results = getFromCache[GenotypeJson](viewRegion, key, VariantContextMaterialization.name, Overlaps.overlapsVariant, + MangoServletWrapper.showGenotypes, binning).toArray + + if (!results.isEmpty) { + // extract variants only and parse to stringified json + Ok(MangoServletWrapper.materializer.getVariantContext().get.stringify(results)) + } else NoContent + } else NotFound + } + } + } + + def reference(contig: String, start: Long, end: Long) = Action { + val viewRegion = ReferenceRegion(contig, start, end) + val dictOpt = MangoServletWrapper.globalDict(viewRegion.referenceName) + if (dictOpt.isDefined) { + Ok(MangoServletWrapper.annotationRDD.getReferenceString(viewRegion)) + } else NotFound + } + + def setContig(contig: String, start: Long, end: Long) = Action { request => + val dictOpt = MangoServletWrapper.globalDict(contig) + val viewRegion = ReferenceRegion(contig, start, + VizUtils.getEnd(end, dictOpt)) + Ok("").withSession( + ("contig" -> contig), + ("start" -> start.toString), + ("end" -> end.toString)) + } + + /** + * Gets Coverage for a get Request. This is used to get both Reads based coverage and generic coverage. + * + * @param viewRegion ReferenceRegion to view coverage over + * @param key key for coverage file (see LazyMaterialization) + * @return ActionResult of coverage json + */ + private def getCoverage(viewRegion: ReferenceRegion, key: String, binning: Int = 1): Result = { + VizTimers.CoverageRequest.time { + if (!MangoServletWrapper.materializer.coveragesExist) { + NotFound + } else { + val dictOpt = MangoServletWrapper.globalDict(viewRegion.referenceName) + if (dictOpt.isDefined) { + + val results = getFromCache[PositionCount](viewRegion, key, CoverageMaterialization.name, Overlaps.overlapsCoverage, + binning = binning).toArray + + if (!results.isEmpty) { + Ok(MangoServletWrapper.materializer.getCoverage().get.stringify(results)) + } else NoContent + } else NotFound + } + } + } + + private def getFromCache[T](region: ReferenceRegion, + key: String, + name: String, + overlaps: (T, ReferenceRegion) => Boolean, + verbose: Boolean = true, + binning: Int = 1): mutable.ArraySeq[T] = { + + // expand region + val expandedRegions = MangoServletWrapper.expand(region) + + // get cache keys based on expanded regions + val cacheKeys = expandedRegions.map(r => (r, s"${name}_${r.toString}_${binning}")) + + // get keys that are not found in cache + val keysNotFound = cacheKeys.filter(k => { + !cache.get(k._2).isDefined + }) + + // synchronize to avoid duplicating items in cache + MangoServletWrapper.syncObject(name).synchronized { + // merge keys not in cache to reduce Spark calls + val data: Map[String, Array[T]] = Bookkeep.mergeRegions(keysNotFound.map(_._1).toList).map(k => { + MangoServletWrapper.materializer.get(name).get.getJson(k, verbose = verbose, binning = binning) + .asInstanceOf[Map[String, Array[T]]] + }).flatten.toMap + + // put missing keys back into cache, dividing data back up by cacheSize + keysNotFound.map(k => { + val filtered: Map[String, Array[T]] = data.mapValues(r => r.filter(t => overlaps(t, k._1))) + cache.set(k._2, filtered) + }) + } + + // finally, get results from cache + cacheKeys.flatMap(k => cache.get[Map[String, Array[T]]](k._2)) + .flatMap(_.get(key)) // filter by key + .flatten.distinct.filter(r => overlaps(r, region)) // remove elements not overlapping original region + + } + +} + +object Overlaps { + + val overlapsRead = (x: GAReadAlignment, r: ReferenceRegion) => { + val ghRegion = ReferenceRegion(x.getAlignment.getPosition.getReferenceName, x.getAlignment.getPosition.getPosition, + (x.getAlignment.getPosition.getPosition + 1)) + ghRegion.overlaps(r) + } + + val overlapsFeature = (x: BedRowJson, r: ReferenceRegion) => x.overlaps(r) + + val overlapsVariant = (x: GenotypeJson, r: ReferenceRegion) => { + x.overlaps(r) + } + + val overlapsCoverage = (x: PositionCount, r: ReferenceRegion) => { + x.overlaps(r) + } + +} diff --git a/mango-play/app/utils/ApplicationWrapper.scala b/mango-play/app/utils/ApplicationWrapper.scala new file mode 100644 index 000000000..5e4fef089 --- /dev/null +++ b/mango-play/app/utils/ApplicationWrapper.scala @@ -0,0 +1,385 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package utils + +import java.io.{ PrintWriter, FileNotFoundException } + +import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary } +import org.bdgenomics.mango.filters._ +import org.bdgenomics.mango.models._ +import org.bdgenomics.utils.cli._ +import org.bdgenomics.utils.instrumentation.{ MetricsListener, Metrics } +import org.bdgenomics.utils.misc.Logging +import org.kohsuke.args4j.{ Argument, Option => Args4jOption } + +/** + * Timers for fetching data to be returned as Json + */ +object VizTimers extends Metrics { + //HTTP requests + val ReadsRequest = timer("GET reads") + val CoverageRequest = timer("GET coverage") + val FreqRequest = timer("GET frequency") + val VarRequest = timer("GET variants") + val VarFreqRequest = timer("Get variant frequency") + val FeatRequest = timer("GET features") + val AlignmentRequest = timer("GET alignment") + + //RDD operations + val FreqRDDTimer = timer("RDD Freq operations") + val VarRDDTimer = timer("RDD Var operations") + val FeatRDDTimer = timer("RDD Feat operations") + val RefRDDTimer = timer("RDD Ref operations") + val GetPartChunkTimer = timer("Calculate block chunk") + + //Generating Json + val MakingTrack = timer("Making Track") + val DoingCollect = timer("Doing Collect") + val PrintReferenceTimer = timer("JSON get reference string") +} + +/** + * Arguments for mango startup + */ +class ServerArgs extends Args4jBase with ParquetArgs { + @Argument(required = true, metaVar = "reference", usage = "The reference file to view, required", index = 0) + var referencePath: String = null + + @Args4jOption(required = false, name = "-genes", usage = "Gene URL.") + var genePath: String = null + + @Args4jOption(required = false, name = "-reads", usage = "A list of reads files to view, separated by commas (,)") + var readsPaths: String = null + + @Args4jOption(required = false, name = "-coverage", usage = "A list of coverage files to view, separated by commas (,)") + var coveragePaths: String = null + + @Args4jOption(required = false, name = "-variants", usage = "A list of variants files to view, separated by commas (,). " + + "Vcf files require a corresponding tbi index.") + var variantsPaths: String = null + + @Args4jOption(required = false, name = "-show_genotypes", usage = "Shows genotypes if available in variant files.") + var showGenotypes: Boolean = false + + @Args4jOption(required = false, name = "-features", usage = "The feature files to view, separated by commas (,)") + var featurePaths: String = null + + @Args4jOption(required = false, name = "-port", usage = "The port to bind to for visualization. The default is 8080.") + var port: Int = 8080 + + @Args4jOption(required = false, name = "-discover", usage = "This turns on discovery mode on start up.") + var discoveryMode: Boolean = false + + @Args4jOption(required = false, name = "-prefetchSize", usage = "Bp to prefetch in executors.") + var prefetchSize: Int = 10000 + + @Args4jOption(required = false, name = "-cacheSize", usage = "Bp to cache on driver.") + var cacheSize: Int = MangoServletWrapper.cacheSize + + @Args4jOption(required = false, name = "-preload", usage = "Chromosomes to prefetch, separated by commas (,).") + var preload: String = null +} + +/** + * Contains caching, error and util function information for formatting and serving Json data + */ +object MangoServletWrapper extends BDGCommandCompanion with Logging { + + val commandName: String = "Mango" + val commandDescription: String = "Genomic visualization for ADAM" + + var sc: SparkContext = null + + var materializer: Materializer = null + + var cacheSize: Int = 1000 + + var metrics: Option[MetricsListener] = None + + var globalDict: SequenceDictionary = null + + // Structures storing data types. All but reference is optional + var annotationRDD: AnnotationMaterialization = null + + // Gene URL + var genes: Option[String] = None + + var showGenotypes: Boolean = false + + /** + * Caching information for frontend + */ + // stores synchonization objects + var syncObject: Map[String, Object] = Map.empty[String, Object] + + // regions to prefetch during discovery. sent to front + // end for visual processing + var prefetchedRegions: List[(ReferenceRegion, Double)] = List() + + /** + * Prints spark metrics to System.out + */ + def printMetrics { + if (metrics.isDefined) { + // Set the output buffer size to 4KB by default + val out = new PrintWriter(System.out) + out.println("Metrics:") + out.println("") + Metrics.print(out, metrics.map(_.metrics.sparkMetrics.stageTimes)) + out.println() + metrics.foreach(_.metrics.sparkMetrics.print(out)) + out.flush() + } + } + + /** + * Populates MangoServletWrapper by using string parameters + * @param cmdLine String of parameters (ie "reference.fa -reads reads.bam -discover" + * @return MangoServletWrapper class + */ + def apply(cmdLine: String): MangoServletWrapper = { + val params = cmdLine.split("\\s").filter(r => !r.isEmpty) + this.apply(params) + } + + /** + * Populates MangoServletWrapper by using list of string parameters + * @param cmdLine List of command line parameters + * @return MangoServletWrapper class + */ + def apply(cmdLine: Array[String]): MangoServletWrapper = { + new MangoServletWrapper(Args4j[ServerArgs](cmdLine)) + } + + /** + * Returns stringified version of sequence dictionary + * + * @param regions: regions to format to string + * @return list of strinified reference regions + */ + def formatClickableRegions(regions: List[(ReferenceRegion, Double)]): String = { + regions.map(r => s"${r._1.referenceName}:${r._1.start}-${r._1.end}" + + s"-${BigDecimal(r._2).setScale(2, BigDecimal.RoundingMode.HALF_UP).toDouble}").mkString(",") + } + + /** + * Expands region to match the cach size. Region will be expanded by rounding down the start position + * to be a factor of the cache size. If region is larger than cache size, the region size will + * be a factor of the cache size. + * + * @param region ReferenceRegion to expand + * @param minLength minimum length to expand the size of the region to + * @return expanded region + */ + def expand(region: ReferenceRegion, minLength: Long = MangoServletWrapper.cacheSize): Array[ReferenceRegion] = { + require(minLength > 0, s"minimum length ${minLength} must be greater than 0") + val finalStart = region.start - (region.start % minLength) + val finalEnd = Math.max(region.end, finalStart + minLength) + val blocks = ((finalEnd - finalStart) / minLength).toInt + 1 + (0 until blocks).map(r => { + val start = finalStart + (minLength * r) + val end = start + minLength + region.copy(start = start, end = end) + }).toArray + } +} + +/** + * Initializes all data types to be served to frontend. Optionally runs discovery mode. + * + * @param args ServerArgs + */ +class MangoServletWrapper(val args: ServerArgs) extends BDGSparkCommand[ServerArgs] with Logging { + val companion: BDGCommandCompanion = MangoServletWrapper + + override def run(sc: SparkContext) = { + + // Initialize Metrics + MangoServletWrapper.metrics = initializeMetrics(sc) + + // Set SparkContext + MangoServletWrapper.sc = sc + + // set driver cache size + MangoServletWrapper.cacheSize = args.cacheSize + + // initialize required annotation dataset + initAnnotations(sc) + + // check whether genePath was supplied + if (args.genePath != null) { + MangoServletWrapper.genes = Some(args.genePath) + } + + // set materializer + MangoServletWrapper.materializer = Materializer(Seq(initAlignments(sc, args.prefetchSize), initCoverages(sc, args.prefetchSize), + initVariantContext(sc, args.prefetchSize), initFeatures(sc, args.prefetchSize)).flatten) + + val preload = Option(args.preload).getOrElse("").split(',').flatMap(r => LazyMaterialization.getContigPredicate(r)) + + // run discovery mode if it is specified in the startup script + if (!preload.isEmpty) { + val preloaded = MangoServletWrapper.globalDict.records.filter(r => preload.contains(r.name)) + .map(r => ReferenceRegion(r.name, 0, r.length)) + preprocess(preloaded) + } + + // run discovery mode if it is specified in the startup script + if (args.discoveryMode) { + MangoServletWrapper.prefetchedRegions = discoverFrequencies(sc) + } + } + + /* + * Initialize required reference file + */ + def initAnnotations(sc: SparkContext) = { + val referencePath = Option(args.referencePath).getOrElse({ + throw new FileNotFoundException("reference file not provided") + }) + + MangoServletWrapper.annotationRDD = new AnnotationMaterialization(sc, referencePath) + MangoServletWrapper.globalDict = MangoServletWrapper.annotationRDD.getSequenceDictionary + } + + /* + * Initialize loaded alignment files + */ + def initAlignments(sc: SparkContext, prefetch: Int): Option[AlignmentRecordMaterialization] = { + if (Option(args.readsPaths).isDefined) { + val readsPaths = args.readsPaths.split(",").toList + if (readsPaths.nonEmpty) { + object readsWait + MangoServletWrapper.syncObject += (AlignmentRecordMaterialization.name -> readsWait) + Some(new AlignmentRecordMaterialization(sc, readsPaths, MangoServletWrapper.globalDict, Some(prefetch))) + } else None + } else None + } + + /* + * Initialize coverage files + */ + def initCoverages(sc: SparkContext, prefetch: Int): Option[CoverageMaterialization] = { + if (Option(args.coveragePaths).isDefined) { + val coveragePaths = args.coveragePaths.split(",").toList + + if (coveragePaths.nonEmpty) { + object coverageWait + MangoServletWrapper.syncObject += (CoverageMaterialization.name -> coverageWait) + Some(new CoverageMaterialization(sc, coveragePaths, MangoServletWrapper.globalDict, Some(prefetch))) + } else None + } else None + } + + /** + * Initialize loaded variant files + */ + def initVariantContext(sc: SparkContext, prefetch: Int): Option[VariantContextMaterialization] = { + // set flag for visualizing genotypes + MangoServletWrapper.showGenotypes = args.showGenotypes + + if (Option(args.variantsPaths).isDefined) { + val variantsPaths = args.variantsPaths.split(",").toList + + if (variantsPaths.nonEmpty) { + object variantsWait + MangoServletWrapper.syncObject += (VariantContextMaterialization.name -> variantsWait) + Some(new VariantContextMaterialization(sc, variantsPaths, MangoServletWrapper.globalDict, Some(prefetch))) + } else None + } else None + } + + /** + * Initialize loaded feature files + */ + def initFeatures(sc: SparkContext, prefetch: Int): Option[FeatureMaterialization] = { + val featurePaths = Option(args.featurePaths) + if (featurePaths.isDefined) { + val featurePaths = args.featurePaths.split(",").toList + if (featurePaths.nonEmpty) { + object featuresWait + MangoServletWrapper.syncObject += (FeatureMaterialization.name -> featuresWait) + Some(new FeatureMaterialization(sc, featurePaths, MangoServletWrapper.globalDict, Some(prefetch))) + } else None + } else None + } + + /** + * Runs total data scan over all feature, variant and coverage files, calculating the normalied frequency at all + * windows in the genome. + * + * @return Returns list of windowed regions in the genome and their corresponding normalized frequencies + */ + def discoverFrequencies(sc: SparkContext): List[(ReferenceRegion, Double)] = { + + val discovery = Discovery(MangoServletWrapper.annotationRDD.getSequenceDictionary) + var regions: List[(ReferenceRegion, Double)] = List() + + // get feature frequency + if (MangoServletWrapper.materializer.featuresExist) { + val featureRegions = MangoServletWrapper.materializer.getFeatures().get.get().map(r => ReferenceRegion.unstranded(r._2)) + regions = regions ++ discovery.getFrequencies(featureRegions) + } + + // get variant frequency + if (MangoServletWrapper.materializer.variantContextExist) { + val variantRegions = MangoServletWrapper.materializer.getVariantContext().get.get().map(r => ReferenceRegion(r._2.variant)) + regions = regions ++ discovery.getFrequencies(variantRegions) + } + + // get coverage frequency + // Note: calculating coverage frequency is an expensive operation. Only perform if sc is not local. + if (MangoServletWrapper.materializer.coveragesExist && !sc.isLocal) { + val coverageRegions = MangoServletWrapper.materializer.getCoverage().get.get().map(r => ReferenceRegion(r._2)) + regions = regions ++ discovery.getFrequencies(coverageRegions) + } + + // group all regions together and reduce down for all data types + regions = regions.groupBy(_._1).map(r => (r._1, r._2.map(a => a._2).sum)).toList + + // normalize and filter by regions with data + val max = regions.map(_._2).reduceOption(_ max _).getOrElse(1.0) + regions.map(r => (r._1, r._2 / max)) + .filter(_._2 > 0.0) + } + + /** + * preprocesses data by loading specified regions into memory for reads, coverage, variants and features + * + * @param regions Regions to be preprocessed + */ + def preprocess(regions: Vector[ReferenceRegion]) = { + // select two of the highest occupied regions to load + // The number of selected regions is low to reduce unnecessary loading while + // jump starting Thread setup for Spark on the specific data files + + for (region <- regions) { + if (MangoServletWrapper.materializer.featuresExist) + MangoServletWrapper.materializer.getFeatures().get.get(Some(region)).count() + if (MangoServletWrapper.materializer.readsExist) + MangoServletWrapper.materializer.getReads().get.get(Some(region)).count() + if (MangoServletWrapper.materializer.coveragesExist) + MangoServletWrapper.materializer.getCoverage.get.get(Some(region)).count() + if (MangoServletWrapper.materializer.variantContextExist) + MangoServletWrapper.materializer.getVariantContext().get.get(Some(region)).count() + } + } + +} + diff --git a/mango-play/app/utils/Global.scala b/mango-play/app/utils/Global.scala new file mode 100644 index 000000000..e3af2e5bf --- /dev/null +++ b/mango-play/app/utils/Global.scala @@ -0,0 +1,97 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package utils + +import java.io._ + +import com.typesafe.config.ConfigException +import org.apache.spark.{ SparkContext, SparkConf } +import play.api._ +import org.bdgenomics.utils.misc.Logging + +object Global extends GlobalSettings with Logging { + + override def onStop(app: Application): Unit = { + // print timing metrics + MangoServletWrapper.printMetrics + } + + override def beforeStart(app: Application) = { + + // Load in Spark Home + val sparkHome = + try { + app.configuration.underlying.getString("spark.home") + } catch { + case e: ConfigException => + log.error("Spark Home not set") + System.exit(-1) + null + } + + // Load in Mango Args + val appArgs = + try { + app.configuration.underlying.getString("app.args") + } catch { + case e: ConfigException => + log.warn("No Mango Args provided") + System.exit(-1) + null + } + + // TODO: set spark args + val conf = new SparkConf(false) // skip loading external settings + .setAppName(MangoServletWrapper.commandName) + .setMaster("local[%d]".format(Runtime.getRuntime.availableProcessors())) + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .set("spark.kryo.registrator", "org.bdgenomics.mango.serialization.MangoKryoRegistrator") + .set("spark.dynamicAllocation.executorIdleTimeout", "10d") + .setSparkHome(sparkHome) + + try { + val sparkArgs = parse(app.configuration.underlying.getString("spark.args")) + // set master + val master = sparkArgs.find(_._1.contains("master")) + if (master.isDefined) { + conf.setMaster(master.get._2) + } + conf.setAll(sparkArgs.filter(!_._1.contains("master")).map(r => (r._1, r._2)).toTraversable) + } catch { + case e: ConfigException => + log.info("No Spark Args Provided.") + } + + val sc = new SparkContext(conf) + MangoServletWrapper(appArgs).run(sc) + } + + /** + * Parse a list of spark-submit command line options. + * + * See SparkSubmitArguments.scala for a more formal description of available options. + * + */ + def parse(argStr: String): Iterator[(String, String)] = { + val args = argStr.split("\\s").filter(!_.isEmpty).sliding(2, 2).toArray + // require that all spark args have value + require(args.forall(_.length == 2), "Error, Invalid spark args.") + args.map(r => (r(0), r(1))).toIterator + } + +} diff --git a/mango-play/app/utils/JsonImplicits.scala b/mango-play/app/utils/JsonImplicits.scala new file mode 100644 index 000000000..90ce7aefe --- /dev/null +++ b/mango-play/app/utils/JsonImplicits.scala @@ -0,0 +1,51 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package utils + +import org.bdgenomics.adam.models.{ SequenceRecord, SequenceDictionary } +import play.api.libs.json._ + +/** + * Holds all implicit conversion that will be used by play.api.libs.json. These + * are used for Read and Write to convert objects to and from play json + */ +object JsonImplicits { + + // implicit writer for SequenceDictionary + implicit val sdWrites = new Writes[SequenceDictionary] { + def writes(sd: SequenceDictionary) = { + val records = sd.records.sortBy(_.length).reverse.map(r => Json.obj( + "name" -> r.name, + "length" -> r.length)) + records.foldLeft(JsArray())((acc, x) => acc ++ Json.arr(x)) + } + } + + // implicit reader for SequenceDictionary + implicit val sdReads = new Reads[SequenceDictionary] { + def reads(json: JsValue): JsResult[SequenceDictionary] = { + val jsArr: JsArray = json.as[JsArray] + val records = jsArr.value.map(r => { + val name = (r \ "name").as[String] + val length = (r \ "length").as[Long] + SequenceRecord(name, length) + }) + JsSuccess(new SequenceDictionary(records.toVector)) + } + } +} \ No newline at end of file diff --git a/mango-play/app/views/browser.scala.html b/mango-play/app/views/browser.scala.html new file mode 100644 index 000000000..e4a070f95 --- /dev/null +++ b/mango-play/app/views/browser.scala.html @@ -0,0 +1,169 @@ +@(contig: String, start: Long, end: Long, genes: Option[String], + coverage: Option[List[String]], + reads: Option[List[(String, Option[String])]], + variants: Option[List[(String, String)]], + features: Option[List[String]], + isBrowser: Boolean = true) + + + + @head() + @navigation(isBrowser) + @sidebar() + +
+
+
+ + + + \ No newline at end of file diff --git a/mango-play/app/views/head.scala.html b/mango-play/app/views/head.scala.html new file mode 100644 index 000000000..149dfb405 --- /dev/null +++ b/mango-play/app/views/head.scala.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mango-cli/src/main/webapp/WEB-INF/layouts/overall.ssp b/mango-play/app/views/index.scala.html similarity index 91% rename from mango-cli/src/main/webapp/WEB-INF/layouts/overall.ssp rename to mango-play/app/views/index.scala.html index bf9435692..c0cd179f3 100644 --- a/mango-cli/src/main/webapp/WEB-INF/layouts/overall.ssp +++ b/mango-play/app/views/index.scala.html @@ -1,10 +1,9 @@ +@(regions: String, isBrowser: Boolean = false) -<%@ val dictionary: String %> -<%@ val regions: String %> -<% render("templates/head.ssp") %> -<% render("navigation/navigation.ssp", Map("isBrowser" -> false)) %> +@head() +@navigation(isBrowser)
@@ -12,7 +11,18 @@ + + \ No newline at end of file diff --git a/mango-play/conf/application.conf b/mango-play/conf/application.conf new file mode 100644 index 000000000..cee7c931f --- /dev/null +++ b/mango-play/conf/application.conf @@ -0,0 +1,24 @@ +# This is the main configuration file for the application. +# ~~~~~ + +# The application languages +# ~~~~~ +application.langs="en" + +play.crypto.secret=${?APPLICATION_SECRET} + +http.port=8080 + + +# Define the Global object class for this application. +# Default to Global in the root package. +application.global=utils.Global + +# Application specific parameters +spark.home=${?SPARK_HOME} +spark.args=${?SPARK_ARGS} + +app.args=${?MANGO_ARGS} +app.jar =${?MANGO_MAIN_JAR} +app.class="utils.MangoServletWrapper" + diff --git a/mango-play/conf/dependencies.yml b/mango-play/conf/dependencies.yml new file mode 100644 index 000000000..c106ee70f --- /dev/null +++ b/mango-play/conf/dependencies.yml @@ -0,0 +1,9 @@ +require: + - play + - provided -> spark-core_2.10 1.6.0 +repositories: + - provided: + type: local + artifact: "${application.path}/lib/[module]-[revision].jar" + contains: + - provided -> * \ No newline at end of file diff --git a/mango-play/conf/logback-test.xml b/mango-play/conf/logback-test.xml new file mode 100644 index 000000000..37ab8a86b --- /dev/null +++ b/mango-play/conf/logback-test.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/mango-play/conf/routes b/mango-play/conf/routes new file mode 100644 index 000000000..c42f88e10 --- /dev/null +++ b/mango-play/conf/routes @@ -0,0 +1,19 @@ +# Routes +# This file defines all application routes (Higher priority routes first) +# ~~~~ + +# Home page +GET /quit @controllers.MangoApplication.quit +GET / @controllers.MangoApplication.index +GET /browser @controllers.MangoApplication.browser +GET /reference/:contig @controllers.MangoApplication.reference(contig: String, start: Long, end: Long) +GET /features/:key/:contig @controllers.MangoApplication.features(key: String, contig: String, start: Long, end: Long, binning: Int = 1) +GET /coverage/:key/:contig @controllers.MangoApplication.coverage(key: String, contig: String, start: Long, end: Long, binning: Int = 1) +GET /reads/:key/:contig @controllers.MangoApplication.reads(key: String, contig: String, start: Long, end: Long) +GET /reads/coverage/:key/:contig @controllers.MangoApplication.readCoverage(key: String, contig: String, start: Long, end: Long, binning: Int = 1) +GET /sequenceDictionary @controllers.MangoApplication.sequenceDictionary +GET /variants/:key/:contig @controllers.MangoApplication.variants(key: String, contig: String, start: Long, end: Long, binning: Int = 1) +GET /setContig/:ref @controllers.MangoApplication.setContig(ref: String, start: Long, end: Long) + +# Map static resources from the /public folder to the /assets URL path +GET /assets/*file controllers.Assets.at(path="/public", file) diff --git a/mango-play/pom.xml b/mango-play/pom.xml new file mode 100644 index 000000000..b8d05dd37 --- /dev/null +++ b/mango-play/pom.xml @@ -0,0 +1,216 @@ + + + 4.0.0 + + + org.bdgenomics.mango + mango-parent + 0.0.1-SNAPSHOT + ../pom.xml + + + mango-play + mango-play + play2 + + + + typesafe-releases + http://repo.typesafe.com/typesafe/releases/ + + false + + + + + + + typesafe-releases-plugins + http://repo.typesafe.com/typesafe/releases/ + + false + + + + + + + org.bdgenomics.mango + mango-core + ${project.version} + + + + com.typesafe.play + play_${scala.version.prefix} + ${play2.version} + + + com.fasterxml.jackson.core + * + + + + + com.typesafe.play + play-test_${scala.version.prefix} + ${play2.version} + test + + + com.fasterxml.jackson + * + + + + + com.typesafe.play + play-cache_${scala.version.prefix} + ${play2.version} + + + com.fasterxml.jackson + * + + + + + com.typesafe.play + play-netty-server_${scala.version.prefix} + ${play2.version} + runtime + + + com.fasterxml.jackson + * + + + + + + + org.apache.spark + spark-core_${scala.version.prefix} + provided + + + org.bdgenomics.adam + adam-cli${spark.version.prefix}${scala.version.prefix} + compile + + + org.bdgenomics.adam + adam-core${spark.version.prefix}${scala.version.prefix} + compile + + + org.bdgenomics.adam + adam-core${spark.version.prefix}${scala.version.prefix} + test-jar + test + + + org.bdgenomics.utils + utils-cli_${scala.version.prefix} + compile + + + org.bdgenomics.utils + utils-metrics_${scala.version.prefix} + compile + + + org.bdgenomics.utils + utils-io_${scala.version.prefix} + compile + + + org.bdgenomics.utils + utils-misc_${scala.version.prefix} + test-jar + test + + + org.apache.parquet + parquet-avro + compile + + + org.apache.parquet + parquet-scala_2.10 + compile + + + org.apache.hadoop + hadoop-client + provided + + + org.scalatest + scalatest_${scala.version.prefix} + test + + + com.typesafe + config + 1.3.0 + + + + + + + + test/resources + false + + + ${basedir}/app + ${basedir}/test + + + ${basedir}/conf + + + ${basedir} + + public/** + + + + + + org.codehaus.mojo + exec-maven-plugin + + + org.scalatest + scalatest-maven-plugin + + + com.google.code.play2-maven-plugin + play2-maven-plugin + 1.0.0-beta5 + true + + + com.google.code.sbt-compiler-maven-plugin + sbt-compiler-maven-plugin + 1.0.0-beta8 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.16 + + + **/*Spec.java + **/*Test.java + + + + + + + diff --git a/mango-cli/src/main/webapp/resources/images/favicon.png b/mango-play/public/images/favicon.png similarity index 100% rename from mango-cli/src/main/webapp/resources/images/favicon.png rename to mango-play/public/images/favicon.png diff --git a/mango-cli/src/main/webapp/js/contigWheel.js b/mango-play/public/js/contigWheel.js similarity index 86% rename from mango-cli/src/main/webapp/js/contigWheel.js rename to mango-play/public/js/contigWheel.js index c10238f3f..53267fa64 100644 --- a/mango-cli/src/main/webapp/js/contigWheel.js +++ b/mango-play/public/js/contigWheel.js @@ -4,17 +4,11 @@ function refVis(dictionary, browser, fromOverall) { // Creating reference visualization from sequence dictionary - var dataset = []; var totalLength=0; - // create dataset for chromosomes - var dictionary = dictionary.split(","); + // sum up total dictionary length for (i = 0; i < dictionary.length; i++) { - var range = dictionary[i].split(":")[1].split("-"); - var length = parseInt(range[1]-range[0]); - var name = dictionary[i].split(":")[0]; - dataset.push({name: name, length: length }); - totalLength+=length; + totalLength+=dictionary[i].length; } var innerWidth = 45; @@ -36,7 +30,7 @@ function refVis(dictionary, browser, fromOverall) { .value(function(d) { return d.length/totalLength*100; }) //Express as percentage .sort(null); var path = svg.selectAll('path') - .data(pie(dataset)) + .data(pie(dictionary)) .enter() .append('path') .attr('d', arc) @@ -57,7 +51,7 @@ function refVis(dictionary, browser, fromOverall) { .attr('class', 'percent'); path.on('mouseover', function(d) { - var total = d3.sum(dataset.map(function(d) { + var total = d3.sum(dictionary.map(function(d) { return d.length; })); var percent = Math.round(1000 * d.data.length / total) / 10; //force 1 s.f. diff --git a/mango-cli/src/main/webapp/js/formSubmit.js b/mango-play/public/js/formSubmit.js similarity index 100% rename from mango-cli/src/main/webapp/js/formSubmit.js rename to mango-play/public/js/formSubmit.js diff --git a/mango-cli/src/main/webapp/resources/js/bigSlide.js b/mango-play/public/resources/js/bigSlide.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/bigSlide.js rename to mango-play/public/resources/js/bigSlide.js diff --git a/mango-cli/src/main/webapp/resources/js/d3.js b/mango-play/public/resources/js/d3.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/d3.js rename to mango-play/public/resources/js/d3.js diff --git a/mango-cli/src/main/webapp/resources/js/d3.v3.min.js b/mango-play/public/resources/js/d3.v3.min.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/d3.v3.min.js rename to mango-play/public/resources/js/d3.v3.min.js diff --git a/mango-cli/src/main/webapp/resources/js/jquery-ui.min.js b/mango-play/public/resources/js/jquery-ui.min.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/jquery-ui.min.js rename to mango-play/public/resources/js/jquery-ui.min.js diff --git a/mango-cli/src/main/webapp/resources/js/jquery.autocomplete.min.js b/mango-play/public/resources/js/jquery.autocomplete.min.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/jquery.autocomplete.min.js rename to mango-play/public/resources/js/jquery.autocomplete.min.js diff --git a/mango-cli/src/main/webapp/resources/js/jquery.min.js b/mango-play/public/resources/js/jquery.min.js similarity index 100% rename from mango-cli/src/main/webapp/resources/js/jquery.min.js rename to mango-play/public/resources/js/jquery.min.js diff --git a/mango-cli/src/main/webapp/resources/node_modules/pileup/LICENSE b/mango-play/public/resources/node_modules/pileup/LICENSE similarity index 100% rename from mango-cli/src/main/webapp/resources/node_modules/pileup/LICENSE rename to mango-play/public/resources/node_modules/pileup/LICENSE diff --git a/mango-cli/src/main/webapp/resources/node_modules/pileup/README.md b/mango-play/public/resources/node_modules/pileup/README.md similarity index 100% rename from mango-cli/src/main/webapp/resources/node_modules/pileup/README.md rename to mango-play/public/resources/node_modules/pileup/README.md diff --git a/mango-play/public/resources/node_modules/pileup/dist/lib/minid3.js b/mango-play/public/resources/node_modules/pileup/dist/lib/minid3.js new file mode 100644 index 000000000..8623cc8c6 --- /dev/null +++ b/mango-play/public/resources/node_modules/pileup/dist/lib/minid3.js @@ -0,0 +1,1266 @@ +!function(){ + var d3 = {version: "3.5.17"}; // semver +var d3_document = this.document; + +function d3_documentElement(node) { + return node + && (node.ownerDocument // node is a Node + || node.document // node is a Window + || node).documentElement; // node is a Document +} + +function d3_window(node) { + return node + && ((node.ownerDocument && node.ownerDocument.defaultView) // node is a Node + || (node.document && node) // node is a Window + || node.defaultView); // node is a Document +} +function d3_identity(d) { + return d; +} +// Copies a variable number of methods from source to target. +d3.rebind = function(target, source) { + var i = 1, n = arguments.length, method; + while (++i < n) target[method = arguments[i]] = d3_rebind(target, source, source[method]); + return target; +}; + +// Method is assumed to be a standard D3 getter-setter: +// If passed with no arguments, gets the value. +// If passed with arguments, sets the value and returns the target. +function d3_rebind(target, source, method) { + return function() { + var value = method.apply(source, arguments); + return value === source ? target : value; + }; +} +function d3_vendorSymbol(object, name) { + if (name in object) return name; + name = name.charAt(0).toUpperCase() + name.slice(1); + for (var i = 0, n = d3_vendorPrefixes.length; i < n; ++i) { + var prefixName = d3_vendorPrefixes[i] + name; + if (prefixName in object) return prefixName; + } +} + +var d3_vendorPrefixes = ["webkit", "ms", "moz", "Moz", "o", "O"]; +function d3_class(ctor, properties) { + for (var key in properties) { + Object.defineProperty(ctor.prototype, key, { + value: properties[key], + enumerable: false + }); + } +} + +d3.map = function(object, f) { + var map = new d3_Map; + if (object instanceof d3_Map) { + object.forEach(function(key, value) { map.set(key, value); }); + } else if (Array.isArray(object)) { + var i = -1, + n = object.length, + o; + if (arguments.length === 1) while (++i < n) map.set(i, object[i]); + else while (++i < n) map.set(f.call(object, o = object[i], i), o); + } else { + for (var key in object) map.set(key, object[key]); + } + return map; +}; + +function d3_Map() { + this._ = Object.create(null); +} + +var d3_map_proto = "__proto__", + d3_map_zero = "\0"; + +d3_class(d3_Map, { + has: d3_map_has, + get: function(key) { + return this._[d3_map_escape(key)]; + }, + set: function(key, value) { + return this._[d3_map_escape(key)] = value; + }, + remove: d3_map_remove, + keys: d3_map_keys, + values: function() { + var values = []; + for (var key in this._) values.push(this._[key]); + return values; + }, + entries: function() { + var entries = []; + for (var key in this._) entries.push({key: d3_map_unescape(key), value: this._[key]}); + return entries; + }, + size: d3_map_size, + empty: d3_map_empty, + forEach: function(f) { + for (var key in this._) f.call(this, d3_map_unescape(key), this._[key]); + } +}); + +function d3_map_escape(key) { + return (key += "") === d3_map_proto || key[0] === d3_map_zero ? d3_map_zero + key : key; +} + +function d3_map_unescape(key) { + return (key += "")[0] === d3_map_zero ? key.slice(1) : key; +} + +function d3_map_has(key) { + return d3_map_escape(key) in this._; +} + +function d3_map_remove(key) { + return (key = d3_map_escape(key)) in this._ && delete this._[key]; +} + +function d3_map_keys() { + var keys = []; + for (var key in this._) keys.push(d3_map_unescape(key)); + return keys; +} + +function d3_map_size() { + var size = 0; + for (var key in this._) ++size; + return size; +} + +function d3_map_empty() { + for (var key in this._) return false; + return true; +} +var d3_arraySlice = [].slice, + d3_array = function(list) { return d3_arraySlice.call(list); }; // conversion for NodeLists +function d3_noop() {} + +d3.dispatch = function() { + var dispatch = new d3_dispatch, + i = -1, + n = arguments.length; + while (++i < n) dispatch[arguments[i]] = d3_dispatch_event(dispatch); + return dispatch; +}; + +function d3_dispatch() {} + +d3_dispatch.prototype.on = function(type, listener) { + var i = type.indexOf("."), + name = ""; + + // Extract optional namespace, e.g., "click.foo" + if (i >= 0) { + name = type.slice(i + 1); + type = type.slice(0, i); + } + + if (type) return arguments.length < 2 + ? this[type].on(name) + : this[type].on(name, listener); + + if (arguments.length === 2) { + if (listener == null) for (type in this) { + if (this.hasOwnProperty(type)) this[type].on(name, null); + } + return this; + } +}; + +function d3_dispatch_event(dispatch) { + var listeners = [], + listenerByName = new d3_Map; + + function event() { + var z = listeners, // defensive reference + i = -1, + n = z.length, + l; + while (++i < n) if (l = z[i].on) l.apply(this, arguments); + return dispatch; + } + + event.on = function(name, listener) { + var l = listenerByName.get(name), + i; + + // return the current listener, if any + if (arguments.length < 2) return l && l.on; + + // remove the old listener, if any (with copy-on-write) + if (l) { + l.on = null; + listeners = listeners.slice(0, i = listeners.indexOf(l)).concat(listeners.slice(i + 1)); + listenerByName.remove(name); + } + + // add the new listener, if any + if (listener) listeners.push(listenerByName.set(name, {on: listener})); + + return dispatch; + }; + + return event; +} + +d3.event = null; + +function d3_eventPreventDefault() { + d3.event.preventDefault(); +} + +function d3_eventSource() { + var e = d3.event, s; + while (s = e.sourceEvent) e = s; + return e; +} + +// Like d3.dispatch, but for custom events abstracting native UI events. These +// events have a target component (such as a brush), a target element (such as +// the svg:g element containing the brush) and the standard arguments `d` (the +// target element's data) and `i` (the selection index of the target element). +function d3_eventDispatch(target) { + var dispatch = new d3_dispatch, + i = 0, + n = arguments.length; + + while (++i < n) dispatch[arguments[i]] = d3_dispatch_event(dispatch); + + // Creates a dispatch context for the specified `thiz` (typically, the target + // DOM element that received the source event) and `argumentz` (typically, the + // data `d` and index `i` of the target element). The returned function can be + // used to dispatch an event to any registered listeners; the function takes a + // single argument as input, being the event to dispatch. The event must have + // a "type" attribute which corresponds to a type registered in the + // constructor. This context will automatically populate the "sourceEvent" and + // "target" attributes of the event, as well as setting the `d3.event` global + // for the duration of the notification. + dispatch.of = function(thiz, argumentz) { + return function(e1) { + try { + var e0 = + e1.sourceEvent = d3.event; + e1.target = target; + d3.event = e1; + dispatch[e1.type].apply(thiz, argumentz); + } finally { + d3.event = e0; + } + }; + }; + + return dispatch; +} +d3.requote = function(s) { + return s.replace(d3_requote_re, "\\$&"); +}; + +var d3_requote_re = /[\\\^\$\*\+\?\|\[\]\(\)\.\{\}]/g; +var d3_subclass = {}.__proto__? + +// Until ECMAScript supports array subclassing, prototype injection works well. +function(object, prototype) { + object.__proto__ = prototype; +}: + +// And if your browser doesn't support __proto__, we'll use direct extension. +function(object, prototype) { + for (var property in prototype) object[property] = prototype[property]; +}; + +function d3_selection(groups) { + d3_subclass(groups, d3_selectionPrototype); + return groups; +} + +var d3_select = function(s, n) { return n.querySelector(s); }, + d3_selectAll = function(s, n) { return n.querySelectorAll(s); }, + d3_selectMatches = function(n, s) { + var d3_selectMatcher = n.matches || n[d3_vendorSymbol(n, "matchesSelector")]; + d3_selectMatches = function(n, s) { + return d3_selectMatcher.call(n, s); + }; + return d3_selectMatches(n, s); + }; + +// Prefer Sizzle, if available. +if (typeof Sizzle === "function") { + d3_select = function(s, n) { return Sizzle(s, n)[0] || null; }; + d3_selectAll = Sizzle; + d3_selectMatches = Sizzle.matchesSelector; +} + +d3.selection = function() { + return d3.select(d3_document.documentElement); +}; + +var d3_selectionPrototype = d3.selection.prototype = []; + + +d3_selectionPrototype.select = function(selector) { + var subgroups = [], + subgroup, + subnode, + group, + node; + + selector = d3_selection_selector(selector); + + for (var j = -1, m = this.length; ++j < m;) { + subgroups.push(subgroup = []); + subgroup.parentNode = (group = this[j]).parentNode; + for (var i = -1, n = group.length; ++i < n;) { + if (node = group[i]) { + subgroup.push(subnode = selector.call(node, node.__data__, i, j)); + if (subnode && "__data__" in node) subnode.__data__ = node.__data__; + } else { + subgroup.push(null); + } + } + } + + return d3_selection(subgroups); +}; + +function d3_selection_selector(selector) { + return typeof selector === "function" ? selector : function() { + return d3_select(selector, this); + }; +} + +d3_selectionPrototype.selectAll = function(selector) { + var subgroups = [], + subgroup, + node; + + selector = d3_selection_selectorAll(selector); + + for (var j = -1, m = this.length; ++j < m;) { + for (var group = this[j], i = -1, n = group.length; ++i < n;) { + if (node = group[i]) { + subgroups.push(subgroup = d3_array(selector.call(node, node.__data__, i, j))); + subgroup.parentNode = node; + } + } + } + + return d3_selection(subgroups); +}; + +function d3_selection_selectorAll(selector) { + return typeof selector === "function" ? selector : function() { + return d3_selectAll(selector, this); + }; +} +var d3_nsXhtml = "http://www.w3.org/1999/xhtml"; + +var d3_nsPrefix = { + svg: "http://www.w3.org/2000/svg", + xhtml: d3_nsXhtml, + xlink: "http://www.w3.org/1999/xlink", + xml: "http://www.w3.org/XML/1998/namespace", + xmlns: "http://www.w3.org/2000/xmlns/" +}; + +d3.ns = { + prefix: d3_nsPrefix, + qualify: function(name) { + var i = name.indexOf(":"), prefix = name; + if (i >= 0 && (prefix = name.slice(0, i)) !== "xmlns") name = name.slice(i + 1); + return d3_nsPrefix.hasOwnProperty(prefix) ? {space: d3_nsPrefix[prefix], local: name} : name; + } +}; + +d3_selectionPrototype.attr = function(name, value) { + if (arguments.length < 2) { + + // For attr(string), return the attribute value for the first node. + if (typeof name === "string") { + var node = this.node(); + name = d3.ns.qualify(name); + return name.local + ? node.getAttributeNS(name.space, name.local) + : node.getAttribute(name); + } + + // For attr(object), the object specifies the names and values of the + // attributes to set or remove. The values may be functions that are + // evaluated for each element. + for (value in name) this.each(d3_selection_attr(value, name[value])); + return this; + } + + return this.each(d3_selection_attr(name, value)); +}; + +function d3_selection_attr(name, value) { + name = d3.ns.qualify(name); + + // For attr(string, null), remove the attribute with the specified name. + function attrNull() { + this.removeAttribute(name); + } + function attrNullNS() { + this.removeAttributeNS(name.space, name.local); + } + + // For attr(string, string), set the attribute with the specified name. + function attrConstant() { + this.setAttribute(name, value); + } + function attrConstantNS() { + this.setAttributeNS(name.space, name.local, value); + } + + // For attr(string, function), evaluate the function for each element, and set + // or remove the attribute as appropriate. + function attrFunction() { + var x = value.apply(this, arguments); + if (x == null) this.removeAttribute(name); + else this.setAttribute(name, x); + } + function attrFunctionNS() { + var x = value.apply(this, arguments); + if (x == null) this.removeAttributeNS(name.space, name.local); + else this.setAttributeNS(name.space, name.local, x); + } + + return value == null + ? (name.local ? attrNullNS : attrNull) : (typeof value === "function" + ? (name.local ? attrFunctionNS : attrFunction) + : (name.local ? attrConstantNS : attrConstant)); +} +function d3_collapse(s) { + return s.trim().replace(/\s+/g, " "); +} + +d3_selectionPrototype.classed = function(name, value) { + if (arguments.length < 2) { + + // For classed(string), return true only if the first node has the specified + // class or classes. Note that even if the browser supports DOMTokenList, it + // probably doesn't support it on SVG elements (which can be animated). + if (typeof name === "string") { + var node = this.node(), + n = (name = d3_selection_classes(name)).length, + i = -1; + if (value = node.classList) { + while (++i < n) if (!value.contains(name[i])) return false; + } else { + value = node.getAttribute("class"); + while (++i < n) if (!d3_selection_classedRe(name[i]).test(value)) return false; + } + return true; + } + + // For classed(object), the object specifies the names of classes to add or + // remove. The values may be functions that are evaluated for each element. + for (value in name) this.each(d3_selection_classed(value, name[value])); + return this; + } + + // Otherwise, both a name and a value are specified, and are handled as below. + return this.each(d3_selection_classed(name, value)); +}; + +function d3_selection_classedRe(name) { + return new RegExp("(?:^|\\s+)" + d3.requote(name) + "(?:\\s+|$)", "g"); +} + +function d3_selection_classes(name) { + return (name + "").trim().split(/^|\s+/); +} + +// Multiple class names are allowed (e.g., "foo bar"). +function d3_selection_classed(name, value) { + name = d3_selection_classes(name).map(d3_selection_classedName); + var n = name.length; + + function classedConstant() { + var i = -1; + while (++i < n) name[i](this, value); + } + + // When the value is a function, the function is still evaluated only once per + // element even if there are multiple class names. + function classedFunction() { + var i = -1, x = value.apply(this, arguments); + while (++i < n) name[i](this, x); + } + + return typeof value === "function" + ? classedFunction + : classedConstant; +} + +function d3_selection_classedName(name) { + var re = d3_selection_classedRe(name); + return function(node, value) { + if (c = node.classList) return value ? c.add(name) : c.remove(name); + var c = node.getAttribute("class") || ""; + if (value) { + re.lastIndex = 0; + if (!re.test(c)) node.setAttribute("class", d3_collapse(c + " " + name)); + } else { + node.setAttribute("class", d3_collapse(c.replace(re, " "))); + } + }; +} + +d3_selectionPrototype.style = function(name, value, priority) { + var n = arguments.length; + if (n < 3) { + + // For style(object) or style(object, string), the object specifies the + // names and values of the attributes to set or remove. The values may be + // functions that are evaluated for each element. The optional string + // specifies the priority. + if (typeof name !== "string") { + if (n < 2) value = ""; + for (priority in name) this.each(d3_selection_style(priority, name[priority], value)); + return this; + } + + // For style(string), return the computed style value for the first node. + if (n < 2) { + var node = this.node(); + return d3_window(node).getComputedStyle(node, null).getPropertyValue(name); + } + + // For style(string, string) or style(string, function), use the default + // priority. The priority is ignored for style(string, null). + priority = ""; + } + + // Otherwise, a name, value and priority are specified, and handled as below. + return this.each(d3_selection_style(name, value, priority)); +}; + +function d3_selection_style(name, value, priority) { + + // For style(name, null) or style(name, null, priority), remove the style + // property with the specified name. The priority is ignored. + function styleNull() { + this.style.removeProperty(name); + } + + // For style(name, string) or style(name, string, priority), set the style + // property with the specified name, using the specified priority. + function styleConstant() { + this.style.setProperty(name, value, priority); + } + + // For style(name, function) or style(name, function, priority), evaluate the + // function for each element, and set or remove the style property as + // appropriate. When setting, use the specified priority. + function styleFunction() { + var x = value.apply(this, arguments); + if (x == null) this.style.removeProperty(name); + else this.style.setProperty(name, x, priority); + } + + return value == null + ? styleNull : (typeof value === "function" + ? styleFunction : styleConstant); +} + +d3_selectionPrototype.property = function(name, value) { + if (arguments.length < 2) { + + // For property(string), return the property value for the first node. + if (typeof name === "string") return this.node()[name]; + + // For property(object), the object specifies the names and values of the + // properties to set or remove. The values may be functions that are + // evaluated for each element. + for (value in name) this.each(d3_selection_property(value, name[value])); + return this; + } + + // Otherwise, both a name and a value are specified, and are handled as below. + return this.each(d3_selection_property(name, value)); +}; + +function d3_selection_property(name, value) { + + // For property(name, null), remove the property with the specified name. + function propertyNull() { + delete this[name]; + } + + // For property(name, string), set the property with the specified name. + function propertyConstant() { + this[name] = value; + } + + // For property(name, function), evaluate the function for each element, and + // set or remove the property as appropriate. + function propertyFunction() { + var x = value.apply(this, arguments); + if (x == null) delete this[name]; + else this[name] = x; + } + + return value == null + ? propertyNull : (typeof value === "function" + ? propertyFunction : propertyConstant); +} + +d3_selectionPrototype.text = function(value) { + return arguments.length + ? this.each(typeof value === "function" + ? function() { var v = value.apply(this, arguments); this.textContent = v == null ? "" : v; } : value == null + ? function() { this.textContent = ""; } + : function() { this.textContent = value; }) + : this.node().textContent; +}; + +d3_selectionPrototype.html = function(value) { + return arguments.length + ? this.each(typeof value === "function" + ? function() { var v = value.apply(this, arguments); this.innerHTML = v == null ? "" : v; } : value == null + ? function() { this.innerHTML = ""; } + : function() { this.innerHTML = value; }) + : this.node().innerHTML; +}; + +d3_selectionPrototype.append = function(name) { + name = d3_selection_creator(name); + return this.select(function() { + return this.appendChild(name.apply(this, arguments)); + }); +}; + +function d3_selection_creator(name) { + + function create() { + var document = this.ownerDocument, + namespace = this.namespaceURI; + return namespace === d3_nsXhtml && document.documentElement.namespaceURI === d3_nsXhtml + ? document.createElement(name) + : document.createElementNS(namespace, name); + } + + function createNS() { + return this.ownerDocument.createElementNS(name.space, name.local); + } + + return typeof name === "function" ? name + : (name = d3.ns.qualify(name)).local ? createNS + : create; +} + +d3_selectionPrototype.insert = function(name, before) { + name = d3_selection_creator(name); + before = d3_selection_selector(before); + return this.select(function() { + return this.insertBefore(name.apply(this, arguments), before.apply(this, arguments) || null); + }); +}; + +// TODO remove(selector)? +// TODO remove(node)? +// TODO remove(function)? +d3_selectionPrototype.remove = function() { + return this.each(d3_selectionRemove); +}; + +function d3_selectionRemove() { + var parent = this.parentNode; + if (parent) parent.removeChild(this); +} + +d3.set = function(array) { + var set = new d3_Set; + if (array) for (var i = 0, n = array.length; i < n; ++i) set.add(array[i]); + return set; +}; + +function d3_Set() { + this._ = Object.create(null); +} + +d3_class(d3_Set, { + has: d3_map_has, + add: function(key) { + this._[d3_map_escape(key += "")] = true; + return key; + }, + remove: d3_map_remove, + values: d3_map_keys, + size: d3_map_size, + empty: d3_map_empty, + forEach: function(f) { + for (var key in this._) f.call(this, d3_map_unescape(key)); + } +}); + +d3_selectionPrototype.data = function(value, key) { + var i = -1, + n = this.length, + group, + node; + + // If no value is specified, return the first value. + if (!arguments.length) { + value = new Array(n = (group = this[0]).length); + while (++i < n) { + if (node = group[i]) { + value[i] = node.__data__; + } + } + return value; + } + + function bind(group, groupData) { + var i, + n = group.length, + m = groupData.length, + n0 = Math.min(n, m), + updateNodes = new Array(m), + enterNodes = new Array(m), + exitNodes = new Array(n), + node, + nodeData; + + if (key) { + var nodeByKeyValue = new d3_Map, + keyValues = new Array(n), + keyValue; + + for (i = -1; ++i < n;) { + if (node = group[i]) { + if (nodeByKeyValue.has(keyValue = key.call(node, node.__data__, i))) { + exitNodes[i] = node; // duplicate selection key + } else { + nodeByKeyValue.set(keyValue, node); + } + keyValues[i] = keyValue; + } + } + + for (i = -1; ++i < m;) { + if (!(node = nodeByKeyValue.get(keyValue = key.call(groupData, nodeData = groupData[i], i)))) { + enterNodes[i] = d3_selection_dataNode(nodeData); + } else if (node !== true) { // no duplicate data key + updateNodes[i] = node; + node.__data__ = nodeData; + } + nodeByKeyValue.set(keyValue, true); + } + + for (i = -1; ++i < n;) { + if (i in keyValues && nodeByKeyValue.get(keyValues[i]) !== true) { + exitNodes[i] = group[i]; + } + } + } else { + for (i = -1; ++i < n0;) { + node = group[i]; + nodeData = groupData[i]; + if (node) { + node.__data__ = nodeData; + updateNodes[i] = node; + } else { + enterNodes[i] = d3_selection_dataNode(nodeData); + } + } + for (; i < m; ++i) { + enterNodes[i] = d3_selection_dataNode(groupData[i]); + } + for (; i < n; ++i) { + exitNodes[i] = group[i]; + } + } + + enterNodes.update + = updateNodes; + + enterNodes.parentNode + = updateNodes.parentNode + = exitNodes.parentNode + = group.parentNode; + + enter.push(enterNodes); + update.push(updateNodes); + exit.push(exitNodes); + } + + var enter = d3_selection_enter([]), + update = d3_selection([]), + exit = d3_selection([]); + + if (typeof value === "function") { + while (++i < n) { + bind(group = this[i], value.call(group, group.parentNode.__data__, i)); + } + } else { + while (++i < n) { + bind(group = this[i], value); + } + } + + update.enter = function() { return enter; }; + update.exit = function() { return exit; }; + return update; +}; + +function d3_selection_dataNode(data) { + return {__data__: data}; +} + +d3_selectionPrototype.datum = function(value) { + return arguments.length + ? this.property("__data__", value) + : this.property("__data__"); +}; + +d3_selectionPrototype.filter = function(filter) { + var subgroups = [], + subgroup, + group, + node; + + if (typeof filter !== "function") filter = d3_selection_filter(filter); + + for (var j = 0, m = this.length; j < m; j++) { + subgroups.push(subgroup = []); + subgroup.parentNode = (group = this[j]).parentNode; + for (var i = 0, n = group.length; i < n; i++) { + if ((node = group[i]) && filter.call(node, node.__data__, i, j)) { + subgroup.push(node); + } + } + } + + return d3_selection(subgroups); +}; + +function d3_selection_filter(selector) { + return function() { + return d3_selectMatches(this, selector); + }; +} + +d3_selectionPrototype.order = function() { + for (var j = -1, m = this.length; ++j < m;) { + for (var group = this[j], i = group.length - 1, next = group[i], node; --i >= 0;) { + if (node = group[i]) { + if (next && next !== node.nextSibling) next.parentNode.insertBefore(node, next); + next = node; + } + } + } + return this; +}; +d3.ascending = d3_ascending; + +function d3_ascending(a, b) { + return a < b ? -1 : a > b ? 1 : a >= b ? 0 : NaN; +} + +d3_selectionPrototype.sort = function(comparator) { + comparator = d3_selection_sortComparator.apply(this, arguments); + for (var j = -1, m = this.length; ++j < m;) this[j].sort(comparator); + return this.order(); +}; + +function d3_selection_sortComparator(comparator) { + if (!arguments.length) comparator = d3_ascending; + return function(a, b) { + return a && b ? comparator(a.__data__, b.__data__) : !a - !b; + }; +} + +d3_selectionPrototype.each = function(callback) { + return d3_selection_each(this, function(node, i, j) { + callback.call(node, node.__data__, i, j); + }); +}; + +function d3_selection_each(groups, callback) { + for (var j = 0, m = groups.length; j < m; j++) { + for (var group = groups[j], i = 0, n = group.length, node; i < n; i++) { + if (node = group[i]) callback(node, i, j); + } + } + return groups; +} + +d3_selectionPrototype.call = function(callback) { + var args = d3_array(arguments); + callback.apply(args[0] = this, args); + return this; +}; + +d3_selectionPrototype.empty = function() { + return !this.node(); +}; + +d3_selectionPrototype.node = function() { + for (var j = 0, m = this.length; j < m; j++) { + for (var group = this[j], i = 0, n = group.length; i < n; i++) { + var node = group[i]; + if (node) return node; + } + } + return null; +}; + +d3_selectionPrototype.size = function() { + var n = 0; + d3_selection_each(this, function() { ++n; }); + return n; +}; + +function d3_selection_enter(selection) { + d3_subclass(selection, d3_selection_enterPrototype); + return selection; +} + +var d3_selection_enterPrototype = []; + +d3.selection.enter = d3_selection_enter; +d3.selection.enter.prototype = d3_selection_enterPrototype; + +d3_selection_enterPrototype.append = d3_selectionPrototype.append; +d3_selection_enterPrototype.empty = d3_selectionPrototype.empty; +d3_selection_enterPrototype.node = d3_selectionPrototype.node; +d3_selection_enterPrototype.call = d3_selectionPrototype.call; +d3_selection_enterPrototype.size = d3_selectionPrototype.size; + + +d3_selection_enterPrototype.select = function(selector) { + var subgroups = [], + subgroup, + subnode, + upgroup, + group, + node; + + for (var j = -1, m = this.length; ++j < m;) { + upgroup = (group = this[j]).update; + subgroups.push(subgroup = []); + subgroup.parentNode = group.parentNode; + for (var i = -1, n = group.length; ++i < n;) { + if (node = group[i]) { + subgroup.push(upgroup[i] = subnode = selector.call(group.parentNode, node.__data__, i, j)); + subnode.__data__ = node.__data__; + } else { + subgroup.push(null); + } + } + } + + return d3_selection(subgroups); +}; + +d3_selection_enterPrototype.insert = function(name, before) { + if (arguments.length < 2) before = d3_selection_enterInsertBefore(this); + return d3_selectionPrototype.insert.call(this, name, before); +}; + +function d3_selection_enterInsertBefore(enter) { + var i0, j0; + return function(d, i, j) { + var group = enter[j].update, + n = group.length, + node; + if (j != j0) j0 = j, i0 = 0; + if (i >= i0) i0 = i + 1; + while (!(node = group[i0]) && ++i0 < n); + return node; + }; +} + +// TODO fast singleton implementation? +d3.select = function(node) { + var group; + if (typeof node === "string") { + group = [d3_select(node, d3_document)]; + group.parentNode = d3_document.documentElement; + } else { + group = [node]; + group.parentNode = d3_documentElement(node); + } + return d3_selection([group]); +}; + +d3.selectAll = function(nodes) { + var group; + if (typeof nodes === "string") { + group = d3_array(d3_selectAll(nodes, d3_document)); + group.parentNode = d3_document.documentElement; + } else { + group = d3_array(nodes); + group.parentNode = null; + } + return d3_selection([group]); +}; + +d3_selectionPrototype.on = function(type, listener, capture) { + var n = arguments.length; + if (n < 3) { + + // For on(object) or on(object, boolean), the object specifies the event + // types and listeners to add or remove. The optional boolean specifies + // whether the listener captures events. + if (typeof type !== "string") { + if (n < 2) listener = false; + for (capture in type) this.each(d3_selection_on(capture, type[capture], listener)); + return this; + } + + // For on(string), return the listener for the first node. + if (n < 2) return (n = this.node()["__on" + type]) && n._; + + // For on(string, function), use the default capture. + capture = false; + } + + // Otherwise, a type, listener and capture are specified, and handled as below. + return this.each(d3_selection_on(type, listener, capture)); +}; + +function d3_selection_on(type, listener, capture) { + var name = "__on" + type, + i = type.indexOf("."), + wrap = d3_selection_onListener; + + if (i > 0) type = type.slice(0, i); + var filter = d3_selection_onFilters.get(type); + if (filter) type = filter, wrap = d3_selection_onFilter; + + function onRemove() { + var l = this[name]; + if (l) { + this.removeEventListener(type, l, l.$); + delete this[name]; + } + } + + function onAdd() { + var l = wrap(listener, d3_array(arguments)); + onRemove.call(this); + this.addEventListener(type, this[name] = l, l.$ = capture); + l._ = listener; + } + + function removeAll() { + var re = new RegExp("^__on([^.]+)" + d3.requote(type) + "$"), + match; + for (var name in this) { + if (match = name.match(re)) { + var l = this[name]; + this.removeEventListener(match[1], l, l.$); + delete this[name]; + } + } + } + + return i + ? listener ? onAdd : onRemove + : listener ? d3_noop : removeAll; +} + +var d3_selection_onFilters = d3.map({ + mouseenter: "mouseover", + mouseleave: "mouseout" +}); + +if (d3_document) { + d3_selection_onFilters.forEach(function(k) { + if ("on" + k in d3_document) d3_selection_onFilters.remove(k); + }); +} + +function d3_selection_onListener(listener, argumentz) { + return function(e) { + var o = d3.event; // Events can be reentrant (e.g., focus). + d3.event = e; + argumentz[0] = this.__data__; + try { + listener.apply(this, argumentz); + } finally { + d3.event = o; + } + }; +} + +function d3_selection_onFilter(listener, argumentz) { + var l = d3_selection_onListener(listener, argumentz); + return function(e) { + var target = this, related = e.relatedTarget; + if (!related || (related !== target && !(related.compareDocumentPosition(target) & 8))) { + l.call(target, e); + } + }; +} + +var d3_event_dragSelect, + d3_event_dragId = 0; + +function d3_event_dragSuppress(node) { + var name = ".dragsuppress-" + ++d3_event_dragId, + click = "click" + name, + w = d3.select(d3_window(node)) + .on("touchmove" + name, d3_eventPreventDefault) + .on("dragstart" + name, d3_eventPreventDefault) + .on("selectstart" + name, d3_eventPreventDefault); + + if (d3_event_dragSelect == null) { + d3_event_dragSelect = "onselectstart" in node ? false + : d3_vendorSymbol(node.style, "userSelect"); + } + + if (d3_event_dragSelect) { + var style = d3_documentElement(node).style, + select = style[d3_event_dragSelect]; + style[d3_event_dragSelect] = "none"; + } + + return function(suppressClick) { + w.on(name, null); + if (d3_event_dragSelect) style[d3_event_dragSelect] = select; + if (suppressClick) { // suppress the next click, but only if it’s immediate + var off = function() { w.on(click, null); }; + w.on(click, function() { d3_eventPreventDefault(); off(); }, true); + setTimeout(off, 0); + } + }; +} + +d3.mouse = function(container) { + return d3_mousePoint(container, d3_eventSource()); +}; + +// https://bugs.webkit.org/show_bug.cgi?id=44083 +var d3_mouse_bug44083 = this.navigator && /WebKit/.test(this.navigator.userAgent) ? -1 : 0; + +function d3_mousePoint(container, e) { + if (e.changedTouches) e = e.changedTouches[0]; + var svg = container.ownerSVGElement || container; + if (svg.createSVGPoint) { + var point = svg.createSVGPoint(); + if (d3_mouse_bug44083 < 0) { + var window = d3_window(container); + if (window.scrollX || window.scrollY) { + svg = d3.select("body").append("svg").style({ + position: "absolute", + top: 0, + left: 0, + margin: 0, + padding: 0, + border: "none" + }, "important"); + var ctm = svg[0][0].getScreenCTM(); + d3_mouse_bug44083 = !(ctm.f || ctm.e); + svg.remove(); + } + } + if (d3_mouse_bug44083) point.x = e.pageX, point.y = e.pageY; + else point.x = e.clientX, point.y = e.clientY; + point = point.matrixTransform(container.getScreenCTM().inverse()); + return [point.x, point.y]; + } + var rect = container.getBoundingClientRect(); + return [e.clientX - rect.left - container.clientLeft, e.clientY - rect.top - container.clientTop]; +}; + +d3.touch = function(container, touches, identifier) { + if (arguments.length < 3) identifier = touches, touches = d3_eventSource().changedTouches; + if (touches) for (var i = 0, n = touches.length, touch; i < n; ++i) { + if ((touch = touches[i]).identifier === identifier) { + return d3_mousePoint(container, touch); + } + } +}; +d3.behavior = {}; + +d3.behavior.drag = function() { + var event = d3_eventDispatch(drag, "drag", "dragstart", "dragend"), + origin = null, + mousedown = dragstart(d3_noop, d3.mouse, d3_window, "mousemove", "mouseup"), + touchstart = dragstart(d3_behavior_dragTouchId, d3.touch, d3_identity, "touchmove", "touchend"); + + function drag() { + this.on("mousedown.drag", mousedown) + .on("touchstart.drag", touchstart); + } + + function dragstart(id, position, subject, move, end) { + return function() { + var that = this, + target = d3.event.target.correspondingElement || d3.event.target, + parent = that.parentNode, + dispatch = event.of(that, arguments), + dragged = 0, + dragId = id(), + dragName = ".drag" + (dragId == null ? "" : "-" + dragId), + dragOffset, + dragSubject = d3.select(subject(target)).on(move + dragName, moved).on(end + dragName, ended), + dragRestore = d3_event_dragSuppress(target), + position0 = position(parent, dragId); + + if (origin) { + dragOffset = origin.apply(that, arguments); + dragOffset = [dragOffset.x - position0[0], dragOffset.y - position0[1]]; + } else { + dragOffset = [0, 0]; + } + + dispatch({type: "dragstart"}); + + function moved() { + var position1 = position(parent, dragId), dx, dy; + if (!position1) return; // this touch didn’t move + + dx = position1[0] - position0[0]; + dy = position1[1] - position0[1]; + dragged |= dx | dy; + position0 = position1; + + dispatch({ + type: "drag", + x: position1[0] + dragOffset[0], + y: position1[1] + dragOffset[1], + dx: dx, + dy: dy + }); + } + + function ended() { + if (!position(parent, dragId)) return; // this touch didn’t end + dragSubject.on(move + dragName, null).on(end + dragName, null); + dragRestore(dragged); + dispatch({type: "dragend"}); + } + }; + } + + drag.origin = function(x) { + if (!arguments.length) return origin; + origin = x; + return drag; + }; + + return d3.rebind(drag, event, "on"); +}; + +// While it is possible to receive a touchstart event with more than one changed +// touch, the event is only shared by touches on the same target; for new +// touches targetting different elements, multiple touchstart events are +// received even when the touches start simultaneously. Since multiple touches +// cannot move the same target to different locations concurrently without +// tearing the fabric of spacetime, we allow the first touch to win. +function d3_behavior_dragTouchId() { + return d3.event.changedTouches[0].identifier; +} + if (typeof define === "function" && define.amd) this.d3 = d3, define(d3); + else if (typeof module === "object" && module.exports) module.exports = d3; + else this.d3 = d3; +}(); diff --git a/mango-play/public/resources/node_modules/pileup/dist/main/AbstractFile.js b/mango-play/public/resources/node_modules/pileup/dist/main/AbstractFile.js new file mode 100644 index 000000000..877e2daee --- /dev/null +++ b/mango-play/public/resources/node_modules/pileup/dist/main/AbstractFile.js @@ -0,0 +1,43 @@ +/** + * AbstractFile is an abstract representation of a file. There are two implementation: + * 1. RemoteFile - representation of a file on a remote server which can be + * fetched in chunks, e.g. using a Range request. + * 2. LocalStringFile is a representation of a file that was created from input string. + * Used for testing and small input files. + * + */ +'use strict'; + +//import Q from 'q'; +var _createClass = (function () {function defineProperties(target, props) {for (var i = 0; i < props.length; i++) {var descriptor = props[i];descriptor.enumerable = descriptor.enumerable || false;descriptor.configurable = true;if ("value" in descriptor) descriptor.writable = true;Object.defineProperty(target, descriptor.key, descriptor);}}return function (Constructor, protoProps, staticProps) {if (protoProps) defineProperties(Constructor.prototype, protoProps);if (staticProps) defineProperties(Constructor, staticProps);return Constructor;};})();function _classCallCheck(instance, Constructor) {if (!(instance instanceof Constructor)) {throw new TypeError("Cannot call a class as a function");}}var +AbstractFile = (function () { + function AbstractFile() {_classCallCheck(this, AbstractFile);}_createClass(AbstractFile, [{ key: "getBytes", + //how to prevent instantation of this class??? + //this code doesn't pass npm run flow + // if (new.target === AbstractFile) { + // throw new TypeError("Cannot construct AbstractFile instances directly"); + // } + value: + + function getBytes(start, length) {//: Q.Promise { + throw new TypeError("Method getBytes is not implemented");} + + + // Read the entire file -- not recommended for large files! + }, { key: "getAll", value: function getAll() {//: Q.Promise { + throw new TypeError("Method getAll is not implemented");} + + + // Reads the entire file as a string (not an ArrayBuffer). + // This does not use the cache. + }, { key: "getAllString", value: function getAllString() {//: Q.Promise { + throw new TypeError("Method getAllString is not implemented");} + + + // Returns a promise for the number of bytes in the remote file. + }, { key: "getSize", value: function getSize() {//: Q.Promise { + throw new TypeError("Method getSize is not implemented");} }]);return AbstractFile;})(); + + + +module.exports = AbstractFile; \ No newline at end of file diff --git a/mango-play/public/resources/node_modules/pileup/dist/pileup.js b/mango-play/public/resources/node_modules/pileup/dist/pileup.js new file mode 100644 index 000000000..b5ea2665b --- /dev/null +++ b/mango-play/public/resources/node_modules/pileup/dist/pileup.js @@ -0,0 +1,717 @@ +(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.pileup = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o").attr(t);this.setElement(n,!1)}}}),e.sync=function(t,n,s){var r=E[t];i.defaults(s||(s={}),{emulateHTTP:e.emulateHTTP,emulateJSON:e.emulateJSON});var a={type:r,dataType:"json"};if(s.url||(a.url=i.result(n,"url")||j()),null!=s.data||!n||"create"!==t&&"update"!==t&&"patch"!==t||(a.contentType="application/json",a.data=JSON.stringify(s.attrs||n.toJSON(s))),s.emulateJSON&&(a.contentType="application/x-www-form-urlencoded",a.data=a.data?{model:a.data}:{}),s.emulateHTTP&&("PUT"===r||"DELETE"===r||"PATCH"===r)){a.type="POST",s.emulateJSON&&(a.data._method=r);var o=s.beforeSend;s.beforeSend=function(t){if(t.setRequestHeader("X-HTTP-Method-Override",r),o)return o.apply(this,arguments)}}"GET"===a.type||s.emulateJSON||(a.processData=!1),"PATCH"===a.type&&x&&(a.xhr=function(){return new ActiveXObject("Microsoft.XMLHTTP")});var h=s.xhr=e.ajax(i.extend(a,s));return n.trigger("request",n,h,s),h};var x=!("undefined"==typeof window||!window.ActiveXObject||window.XMLHttpRequest&&(new XMLHttpRequest).dispatchEvent),E={create:"POST",update:"PUT",patch:"PATCH",delete:"DELETE",read:"GET"};e.ajax=function(){return e.$.ajax.apply(e.$,arguments)};var k=e.Router=function(t){t||(t={}),t.routes&&(this.routes=t.routes),this._bindRoutes(),this.initialize.apply(this,arguments)},T=/\((.*?)\)/g,$=/(\(\?)?:\w+/g,S=/\*\w+/g,H=/[\-{}\[\]+?.,\\\^$|#\s]/g;i.extend(k.prototype,o,{initialize:function(){},route:function(t,n,s){i.isRegExp(t)||(t=this._routeToRegExp(t)),i.isFunction(n)&&(s=n,n=""),s||(s=this[n]);var r=this;return e.history.route(t,function(i){var a=r._extractParameters(t,i);r.execute(s,a),r.trigger.apply(r,["route:"+n].concat(a)),r.trigger("route",n,a),e.history.trigger("route",r,n,a)}),this},execute:function(t,e){t&&t.apply(this,e)},navigate:function(t,i){return e.history.navigate(t,i),this},_bindRoutes:function(){if(this.routes){this.routes=i.result(this,"routes");for(var t,e=i.keys(this.routes);null!=(t=e.pop());)this.route(t,this.routes[t])}},_routeToRegExp:function(t){return t=t.replace(H,"\\$&").replace(T,"(?:$1)?").replace($,function(t,e){return e?t:"([^/?]+)"}).replace(S,"([^?]*?)"),new RegExp("^"+t+"(?:\\?([\\s\\S]*))?$")},_extractParameters:function(t,e){var n=t.exec(e).slice(1);return i.map(n,function(t,e){return e===n.length-1?t||null:t?decodeURIComponent(t):null})}});var A=e.History=function(){this.handlers=[],i.bindAll(this,"checkUrl"),"undefined"!=typeof window&&(this.location=window.location,this.history=window.history)},I=/^[#\/]|\s+$/g,N=/^\/+|\/+$/g,R=/msie [\w.]+/,O=/\/$/,P=/#.*$/;A.started=!1,i.extend(A.prototype,o,{interval:50,atRoot:function(){return this.location.pathname.replace(/[^\/]$/,"$&/")===this.root},getHash:function(t){var e=(t||this).location.href.match(/#(.*)$/);return e?e[1]:""},getFragment:function(t,e){if(null==t)if(this._hasPushState||!this._wantsHashChange||e){t=decodeURI(this.location.pathname+this.location.search);var i=this.root.replace(O,"");t.indexOf(i)||(t=t.slice(i.length))}else t=this.getHash();return t.replace(I,"")},start:function(t){if(A.started)throw new Error("Backbone.history has already been started");A.started=!0,this.options=i.extend({root:"/"},this.options,t),this.root=this.options.root,this._wantsHashChange=this.options.hashChange!==!1,this._wantsPushState=!!this.options.pushState,this._hasPushState=!!(this.options.pushState&&this.history&&this.history.pushState);var n=this.getFragment(),s=document.documentMode,r=R.exec(navigator.userAgent.toLowerCase())&&(!s||s<=7);if(this.root=("/"+this.root+"/").replace(N,"/"),r&&this._wantsHashChange){var a=e.$('