Skip to content

Commit

Permalink
fix rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
davidangb committed Oct 30, 2024
1 parent c787deb commit a8e2f71
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,13 @@ class HttpGoogleServicesDAO(priceListUrl: String, defaultPriceList: GooglePriceL
getScopedServiceAccountCredentials(firecloudAdminSACreds, authScopes)
.refreshAccessToken()
.getTokenValue
}

override def listBucket(bucketName: GcsBucketName, prefix: Option[String]): List[GcsObjectName] = {
val listAttempt = getStorageResource.use { storageService =>
storageService.listObjectsWithPrefix(bucketName, prefix.getOrElse(""), maxPageSize = 5000, isRecursive = true).compile.toList
storageService
.listObjectsWithPrefix(bucketName, prefix.getOrElse(""), maxPageSize = 5000, isRecursive = true)
.compile
.toList
}

// TODO: recurse if there are more files in the bucket and we need to paginate? Does
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ object ExportEntitiesByTypeActor {
exportArgs.model,
system
)


// *******************************************************************************************************************
// POC of file-matching for AJ-2025
Expand Down Expand Up @@ -389,8 +388,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
// those files based on Illumina single end and paired end read patterns
// *******************************************************************************************************************


def matchBucketFiles(matchingOptions: FileMatchingOptions): Future[String] = {
def matchBucketFiles(matchingOptions: FileMatchingOptions): Future[String] =
// retrieve workspace so we can get its bucket
rawlsDAO.getWorkspace(workspaceNamespace, workspaceName)(userInfo) map { workspaceResponse =>
val workspaceBucket = GcsBucketName(workspaceResponse.workspace.bucketName)
Expand All @@ -414,8 +412,10 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
val entities: List[Entity] = pairs.map { pair =>
val attributes = Map(
AttributeName.withDefaultNS("read1") -> AttributeString(urlmap(pair.mainFile)),
AttributeName.withDefaultNS("read2") -> AttributeString(pair.matchedFile.map{f => urlmap(f)}.getOrElse("")),
AttributeName.withDefaultNS("detectedType") -> AttributeString(pair.baseName.getOrElse("")),
AttributeName.withDefaultNS("read2") -> AttributeString(
pair.matchedFile.map(f => urlmap(f)).getOrElse("")
),
AttributeName.withDefaultNS("detectedType") -> AttributeString(pair.baseName.getOrElse(""))
)
Entity(pair.id.getOrElse(pair.mainFile), entityType, attributes)
}
Expand All @@ -424,11 +424,9 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,

// transform the entities into a TSV
val rows = TSVFormatter.makeEntityRows(entityType, entities, entityHeaders)
val rowString = rows.map { _.mkString("\t")}.mkString("\n") + "\n"
val rowString = rows.map(_.mkString("\t")).mkString("\n") + "\n"

headerString + rowString
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,16 @@ import scala.util.matching.Regex
// *******************************************************************************************************************

case class PairPattern(
mainFile: Regex,
pairedFile: String => Regex
)
mainFile: Regex,
pairedFile: String => Regex
)

case class PairMatch(
mainFile: String,
matchedFile: Option[String],
baseName: Option[String],
id: Option[String]
)

mainFile: String,
matchedFile: Option[String],
baseName: Option[String],
id: Option[String]
)

class FileMatcher extends LazyLogging {

Expand All @@ -37,9 +36,8 @@ class FileMatcher extends LazyLogging {
pairNextFile(files, List())
}


@tailrec
private def pairNextFile(remainingFileList: List[String], pairsFound: List[PairMatch]): List[PairMatch] = {
private def pairNextFile(remainingFileList: List[String], pairsFound: List[PairMatch]): List[PairMatch] =
remainingFileList match {
case Nil =>
// no files left to match. Just return what we have found so far.
Expand All @@ -54,7 +52,6 @@ class FileMatcher extends LazyLogging {
}

}
}

private def tryToMatch(mainFile: String, remainingFileList: List[String]): PairMatch = {
// does the current file hit on any of our file-matching patterns?
Expand Down Expand Up @@ -90,5 +87,4 @@ class FileMatcher extends LazyLogging {
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
import akka.http.scaladsl.model.{ContentType, HttpCharsets, MediaTypes}
import akka.http.scaladsl.model.StatusCodes.OK
import akka.http.scaladsl.model.headers.{ContentDispositionTypes, `Content-Disposition`, `Content-Type`}
import akka.http.scaladsl.model.headers.{`Content-Disposition`, `Content-Type`, ContentDispositionTypes}
import akka.http.scaladsl.server.{Directives, Route}
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.lang3.StringUtils
Expand Down Expand Up @@ -64,37 +64,48 @@ trait ExportEntitiesApiService
}
}
}
}
} ~
// *******************************************************************************************************************
// POC of file-matching for AJ-2025
// *******************************************************************************************************************
// TODO: add swagger definition
path( "api" / "workspaces" / Segment / Segment / "entities" / Segment / "tsv" / "frombucket") { (workspaceNamespace, workspaceName, entityType) =>
requireUserInfo() { userInfo =>
post {
import ExportEntitiesByTypeActor._
entity(as[FileMatchingOptions]) { matchingOptions =>
val attributeNames = None
val model = None
val exportArgs = ExportEntitiesByTypeArguments(userInfo, workspaceNamespace, workspaceName, entityType, attributeNames, model)
// *******************************************************************************************************************
// POC of file-matching for AJ-2025
// *******************************************************************************************************************
// TODO: add swagger definition
path("api" / "workspaces" / Segment / Segment / "entities" / Segment / "tsv" / "frombucket") {
(workspaceNamespace, workspaceName, entityType) =>
requireUserInfo() { userInfo =>
post {
import ExportEntitiesByTypeActor._
entity(as[FileMatchingOptions]) { matchingOptions =>
val attributeNames = None
val model = None
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
workspaceName,
entityType,
attributeNames,
model
)

complete {
exportEntitiesByTypeConstructor(exportArgs).matchBucketFiles(matchingOptions) map { pairs =>
// download the TSV as an attachment:
RequestCompleteWithHeaders((OK, pairs),
`Content-Type`.apply(ContentType.apply(MediaTypes.`text/tab-separated-values`, HttpCharsets.`UTF-8`)),
`Content-Disposition`.apply(ContentDispositionTypes.attachment, Map("filename" -> "filematching.tsv"))
)
complete {
exportEntitiesByTypeConstructor(exportArgs).matchBucketFiles(matchingOptions) map { pairs =>
// download the TSV as an attachment:
RequestCompleteWithHeaders(
(OK, pairs),
`Content-Type`.apply(
ContentType.apply(MediaTypes.`text/tab-separated-values`, HttpCharsets.`UTF-8`)
),
`Content-Disposition`.apply(ContentDispositionTypes.attachment,
Map("filename" -> "filematching.tsv")
)
)

// for easy debugging: output the TSV as text
// RequestComplete(OK, pairs)
// for easy debugging: output the TSV as text
// RequestComplete(OK, pairs)
}
}
}
}
}
}
}
}
}
// *******************************************************************************************************************
// POC of file-matching for AJ-2025
// *******************************************************************************************************************
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ class FileMatcherSpec extends AnyFreeSpec with Matchers {
actual shouldBe expected
}
"should return results when some inputs dont hit the regex at all" in {
val input = List("Sample1_01.fastq.gz", "Sample2_01.fastq.gz", "Sample1_02.fastq.gz", "anotherfile.txt",
"my-cat-picture.jpg")
val input = List("Sample1_01.fastq.gz",
"Sample2_01.fastq.gz",
"Sample1_02.fastq.gz",
"anotherfile.txt",
"my-cat-picture.jpg"
)

val expected = List(
PairMatch("Sample1_01.fastq.gz", Option("Sample1_02.fastq.gz"), Option("sample"), Option("1")),
Expand Down

0 comments on commit a8e2f71

Please sign in to comment.