Skip to content

Commit

Permalink
Merge branch 'develop' into update/rawls-model-v0.0.215-SNAP
Browse files Browse the repository at this point in the history
  • Loading branch information
davidangb authored Nov 5, 2024
2 parents c12168a + c44d652 commit b669444
Show file tree
Hide file tree
Showing 28 changed files with 802 additions and 26 deletions.
2 changes: 1 addition & 1 deletion automation/Dockerfile-tests
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.13.15
FROM sbtscala/scala-sbt:eclipse-temurin-17.0.13_11_1.10.5_2.13.15

COPY src /app/src
COPY test.sh /app
Expand Down
2 changes: 1 addition & 1 deletion automation/project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version = 1.10.4
sbt.version = 1.10.5
2 changes: 1 addition & 1 deletion jenkins/ittests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set -eux
./docker/run-es.sh start

# execute tests, overriding elasticsearch.urls to point at the linked container
SBT_IMAGE=sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.13.15
SBT_IMAGE=sbtscala/scala-sbt:eclipse-temurin-17.0.13_11_1.10.5_2.13.15
docker run --rm \
--link elasticsearch-ittest:elasticsearch-ittest \
-v sbt-cache:/root/.sbt \
Expand Down
2 changes: 1 addition & 1 deletion local-dev/templates/docker-rsync-local-orch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ start_server () {
-p 5051:5051 \
--network=fc-orch \
-e JAVA_OPTS="$DOCKER_JAVA_OPTS" \
sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.13.15 \
sbtscala/scala-sbt:eclipse-temurin-17.0.13_11_1.10.5_2.13.15 \
bash -c "git config --global --add safe.directory /app && sbt \~reStart"

docker cp config/firecloud-account.pem orch-sbt:/etc/firecloud-account.pem
Expand Down
8 changes: 4 additions & 4 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ object Dependencies {
// elasticsearch requires log4j, but we redirect log4j to logback
"org.apache.logging.log4j" % "log4j-to-slf4j" % "2.24.1",
"ch.qos.logback" % "logback-classic" % "1.5.12",
"io.sentry" % "sentry-logback" % "7.15.0",
"io.sentry" % "sentry-logback" % "7.16.0",
"com.typesafe.scala-logging" %% "scala-logging" % "3.9.5",

"org.parboiled" % "parboiled-core" % "1.4.1",
Expand All @@ -56,11 +56,11 @@ object Dependencies {
excludeGuava("org.broadinstitute.dsde.workbench" %% "workbench-util" % s"0.10-$workbenchLibsHash"),
"org.broadinstitute.dsde.workbench" %% "workbench-google2" % s"0.36-$workbenchLibsHash",
"org.broadinstitute.dsde.workbench" %% "workbench-oauth2" % s"0.8-$workbenchLibsHash",
"org.broadinstitute.dsde.workbench" %% "sam-client" % "v0.0.287",
"org.broadinstitute.dsde.workbench" %% "sam-client" % "v0.0.296",
"org.broadinstitute.dsde.workbench" %% "workbench-notifications" %s"0.8-$workbenchLibsHash",
"org.databiosphere" % "workspacedataservice-client-okhttp-jakarta" % "0.2.167-SNAPSHOT",
"bio.terra" % "externalcreds-client-resttemplate" % "1.44.0-20240725.201427-1" excludeAll(excludeSpring, excludeSpringBoot),
"org.springframework" % "spring-web" % "6.1.13" excludeAll(excludeSpringBoot, excludeSpringJcl),
"org.springframework" % "spring-web" % "6.1.14" excludeAll(excludeSpringBoot, excludeSpringJcl),

"com.typesafe.akka" %% "akka-actor" % akkaV,
"com.typesafe.akka" %% "akka-slf4j" % akkaV,
Expand Down Expand Up @@ -96,7 +96,7 @@ object Dependencies {
"org.scalatest" %% "scalatest" % "3.2.19" % "test",
"org.mock-server" % "mockserver-netty-no-dependencies" % "5.15.0" % "test",
// provides testing mocks
"com.google.cloud" % "google-cloud-nio" % "0.127.25" % "test",
"com.google.cloud" % "google-cloud-nio" % "0.127.26" % "test",
"org.scalatestplus" %% "mockito-4-5" % "3.2.12.0" % "test"
)
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.10.4
sbt.version=1.10.5
2 changes: 1 addition & 1 deletion script/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function make_jar()

docker run --rm -e GIT_MODEL_HASH=${GIT_MODEL_HASH} \
-v $PWD:/working -w /working -v jar-cache:/root/.ivy -v jar-cache:/root/.ivy2 \
sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.13.15 /working/src/docker/install.sh /working
sbtscala/scala-sbt:eclipse-temurin-17.0.13_11_1.10.5_2.13.15 /working/src/docker/install.sh /working
}

function docker_cmd()
Expand Down
2 changes: 1 addition & 1 deletion script/build_jar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ docker run --rm -e GIT_MODEL_HASH=${GIT_MODEL_HASH} \
-v $PWD:/working \
-v jar-cache:/root/.ivy -v jar-cache:/root/.ivy2 \
-w /working \
sbtscala/scala-sbt:eclipse-temurin-jammy-17.0.10_7_1.10.4_2.13.15 /working/src/docker/clean_install.sh /working
sbtscala/scala-sbt:eclipse-temurin-17.0.13_11_1.10.5_2.13.15 /working/src/docker/clean_install.sh /working

EXIT_CODE=$?

Expand Down
4 changes: 4 additions & 0 deletions src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,7 @@ googlecloud {
"153601": 0.045
}
}

firecloud {
max-filematching-bucket-files = 25000
}
90 changes: 83 additions & 7 deletions src/main/resources/swagger/api-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3688,6 +3688,55 @@ paths:
description: Internal Server Error
content: {}
x-passthrough: false
/api/workspaces/{workspaceNamespace}/{workspaceName}/entities/{entityType}/paired-tsv:
post:
tags:
- Entities
summary: |
Download a TSV of files in the workspace's bucket, paired by naming convention
description: |
Lists the files in the workspace's bucket, filtered to a specified prefix. Then,
attempts to pair those files to each other based on well-known naming conventions.
Downloads a TSV containing the result of those pairings.
operationId: bucketPairedTSV
parameters:
- $ref: '#/components/parameters/workspaceNamespaceParam'
- $ref: '#/components/parameters/workspaceNameParam'
- $ref: '#/components/parameters/entityTypeParam'
requestBody:
content:
'application/json':
schema:
$ref: '#/components/schemas/FileMatchingOptions'
examples:
minimally-required:
value:
prefix: my-bucket-prefix
disable-recursion:
value:
prefix: my-bucket-prefix/
recursive: false
rename-columns:
value:
prefix: my-bucket-prefix
read1Name: my-column-name-one
read2Name: my-column-name-two
required: true
responses:
200:
description: URL to saved file
content:
text/plain:
schema:
type: string
format: binary
404:
description: Workspace or entity type does not exist
content: {}
500:
description: Internal Server Error
content: {}
x-passthrough: false
/api/workspaces/{workspaceNamespace}/{workspaceName}/entityQuery/{entityType}:
get:
tags:
Expand Down Expand Up @@ -7473,6 +7522,28 @@ components:
ExtendedEnabled:
allOf:
- $ref: '#/components/schemas/Enabled'
FileMatchingOptions:
type: object
required:
- prefix
properties:
prefix:
type: string
description: |
Bucket prefix in which to look. If `recursive` is false, this must include a trailing
slash when specifying a subdirectory.
read1Name:
type: string
description: column name to use for the primary "read 1" file
default: read1
read2Name:
type: string
description: column name to use for the matching "read 2" file
default: read2
recursive:
type: boolean
description: whether to list files in subdirectories of the prefix
default: true
FireCloudPermission:
required:
- role
Expand Down Expand Up @@ -7749,40 +7820,45 @@ components:
format: int32
default: 0
MethodQuery:
required:
- namespace
- name
- payload
- entityType
type: object
properties:
namespace:
type: string
description: Namespace which contains AgoraEntity.
default: YOUR_NAMESPACE
example: YOUR_NAMESPACE
name:
type: string
description: Name of the AgoraEntity.
default: BWA
example: BWA
synopsis:
type: string
description: Synopsis which contains AgoraEntity.
default: Quickly aligns short nucleotide sequences.
example: Quickly aligns short nucleotide sequences.
snapshotComment:
type: string
description: Snapshot comment of AgoraEntity
default: Improved spline reticulation
example: Improved spline reticulation
documentation:
type: string
description: Documentation of the AgoraEntity.
default: |
example: |
BWA is a software package for mapping low-divergent sequences
against a large reference genome, such as the human genome.
It consists of three algorithms: BWA-backtrack, BWA-SW and BWA-MEM.
payload:
type: string
description: Payload of method -- must be in WDL format
default: |
example: |
task wc {File in_file command { cat ${in_file} | wc -l } output { Int count = read_int(stdout()) }}
entityType:
type: string
description: Type of the AgoraEntity -- Task or Workflow.
default: Task
example: Task
MethodShort:
required:
- managers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ object FireCloudConfig {
lazy val supportDomain = firecloud.getString("supportDomain")
lazy val supportPrefix = firecloud.getString("supportPrefix")
lazy val userAdminAccount = firecloud.getString("userAdminAccount")
lazy val maxFileMatchingFileCount = firecloud.getInt("max-filematching-bucket-files")
}

object Shibboleth {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ trait GoogleServicesDAO extends ReportsSubsystemStatus {
def publishMessages(fullyQualifiedTopic: String, messages: Seq[String]): Future[Unit]

def getBucket(bucketName: String, petKey: String): Option[Bucket]

def listBucket(bucketName: GcsBucketName, prefix: Option[String], recursive: Boolean): List[GcsObjectName]
}
Original file line number Diff line number Diff line change
Expand Up @@ -400,4 +400,22 @@ class HttpGoogleServicesDAO(priceListUrl: String, defaultPriceList: GooglePriceL
getScopedServiceAccountCredentials(firecloudAdminSACreds, authScopes)
.refreshAccessToken()
.getTokenValue

override def listBucket(bucketName: GcsBucketName,
prefix: Option[String],
recursive: Boolean = true
): List[GcsObjectName] = {
// listObjectsWithPrefix handles paginating through results if there are more results than
// the `maxPageSize` setting.
val listAttempt = getStorageResource.use { storageService =>
storageService
.listObjectsWithPrefix(bucketName, prefix.getOrElse(""), maxPageSize = 2000, isRecursive = recursive)
.compile
.toList
}

// execute the upload
listAttempt.unsafeRunSync()
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package org.broadinstitute.dsde.firecloud.filematch

import com.typesafe.scalalogging.LazyLogging
import org.broadinstitute.dsde.firecloud.filematch.result.{FailedMatchResult, FileMatchResult, SuccessfulMatchResult}
import org.broadinstitute.dsde.firecloud.filematch.strategy.{FileRecognitionStrategy, IlluminaPairedEndStrategy}

import java.nio.file.Path

/**
* Given a list of files, pair those files based on their naming conventions.
* At the time of writing, this involves recognizing Illumina single end and paired end read patterns
* such as those defined at https://support.illumina.com/help/BaseSpace_Sequence_Hub_OLH_009008_2/Source/Informatics/BS/NamingConvention_FASTQ-files-swBS.htm
*
* In the future, we may support additional naming conventions
*/
class FileMatcher extends LazyLogging {

// the list of recognition strategies to use
private val matchingStrategies: List[FileRecognitionStrategy] = List(new IlluminaPairedEndStrategy())

/**
* Given a list of files, pair up those files according to our known recognition strategies.
* @param pathList the list of files to inspect
* @return pairing results
*/
def pairPaths(pathList: List[Path]): List[FileMatchResult] =
performPairing(pathList)

/**
* Given a list of files, pair up those files according to our known recognition strategies.
* @param fileList the list of files to inspect, as Strings
* @return pairing results
*/
def pairFiles(fileList: List[String]): List[FileMatchResult] = {
// convert fileList to pathList
val pathList = fileList.map(file => new java.io.File(file).toPath)
pairPaths(pathList)
}

/**
* Implementation for file pairing. This executes in three steps:
* 1. Use our known file recognition strategies to identify all "read 1" files in the file list
* 2. Search for all "read 2" files in the file list which match the previously-identified "read 1"s
* 3. Handle the remaining files which are not recognized as either "read 1" or "read 2"
*
* @param pathList the list of files to inspect
* @return pairing results
*/
private def performPairing(pathList: List[Path]): List[FileMatchResult] = {
// find every path in the incoming pathList that is recognized by one of our known patterns
val desiredPairings: List[SuccessfulMatchResult] = findFirstFiles(pathList)

// remove the recognized firstFiles from the outstanding pathList
val remainingPaths: List[Path] = pathList diff desiredPairings.map(_.firstFile)

// process the recognized "read 1" files, and look for their desired pairings in the outstanding pathList.
// this will result in either SuccessfulMatchResult when the desired pairing is found, or PartialMatchResult
// when the desired pairing is not found
val pairingResults: List[FileMatchResult] = findSecondFiles(remainingPaths, desiredPairings)

// remove the recognized "read 2" files from the outstanding pathList
val unrecognizedPaths: List[Path] = remainingPaths diff pairingResults.collect { case s: SuccessfulMatchResult =>
s.secondFile
}
// translate the unrecognized paths into a FileMatchResult
val unrecognizedResults: List[FailedMatchResult] = unrecognizedPaths.map(path => FailedMatchResult(path))

// return results, sorted by firstFile
(pairingResults ++ unrecognizedResults).sortBy(r => r.firstFile)
}

/**
* find every path in the incoming pathList that is recognized as a "read 1" by our known patterns
* @param pathList the list of files to inspect
* @return pairing results
*/
private def findFirstFiles(pathList: List[Path]): List[SuccessfulMatchResult] =
pathList.collect { path =>
tryPairingStrategies(path) match {
case success: SuccessfulMatchResult => success
}
}

/**
* find every path in the incoming pathList that is recognized as a "read 2" by our known patterns
*
* @param pathList the list of files to inspect
* @param desiredPairings the "read 2" files to look for in the pathList
* @return pairing results
*/
private def findSecondFiles(pathList: List[Path],
desiredPairings: List[SuccessfulMatchResult]
): List[FileMatchResult] =
desiredPairings.map { desiredPairing =>
// search for the desired pairing's secondFile in the list of actual files
pathList.find(p => p.equals(desiredPairing.secondFile)) match {
case Some(_) => desiredPairing
case None => desiredPairing.toPartial
}
}

/**
* Attempt all the configured file recognition strategies against the supplied file.
*
* @param file the file to try to recognize
* @return SuccessfulMatchResult if the file is recognized; FailedMatchResult if not
*/
private def tryPairingStrategies(file: Path): FileMatchResult = {
// does the current file hit on any of our file-matching patterns?
// Iterate over the matching strategies and return the first successful match result.
val strategyHit = matchingStrategies.collectFirst(strategy =>
strategy.matchFirstFile(file) match {
case success: SuccessfulMatchResult => success
}
)
strategyHit match {
// The current file is recognized by one of our recognition strategies
case Some(desiredResult: SuccessfulMatchResult) => desiredResult
// the current file is not recognized
case _ => FailedMatchResult(file)
}
}

}
Loading

0 comments on commit b669444

Please sign in to comment.