Skip to content

Commit

Permalink
set limit on number of files the API will process
Browse files Browse the repository at this point in the history
  • Loading branch information
davidangb committed Nov 5, 2024
1 parent b0fc0f4 commit 9505b6e
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,7 @@ googlecloud {
"153601": 0.045
}
}

firecloud {
max-filematching-bucket-files = 25000
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ object FireCloudConfig {
lazy val supportDomain = firecloud.getString("supportDomain")
lazy val supportPrefix = firecloud.getString("supportPrefix")
lazy val userAdminAccount = firecloud.getString("userAdminAccount")
lazy val maxFileMatchingFileCount = firecloud.getInt("max-filematching-bucket-files")
}

object Shibboleth {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
case None => ModelSchemaRegistry.getModelForSchemaType(SchemaTypes.FIRECLOUD)
}

// maximum allowed count of files in a bucket for the file-matching API
private val maxFileMatchingFileCount = FireCloudConfig.FireCloud.maxFileMatchingFileCount

def ExportEntities = streamEntities()

/**
Expand Down Expand Up @@ -412,6 +415,12 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
val fileList: List[GcsObjectName] =
googleServicesDao.listBucket(workspaceBucket, Option(matchingOptions.prefix), recursive)

// sanity check
if (fileList.length > maxFileMatchingFileCount) {
throw new FireCloudExceptionWithErrorReport(errorReport =
ErrorReport(StatusCodes.BadRequest, s"Too many files in bucket (${fileList.length}); cannot continue.")
)
}
logger.info(s"found ${fileList.length} files")

// transform the list of GcsObjectName to a list of java.nio.Path
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,7 @@ spray.can.host-connector {
notification {
fullyQualifiedNotificationTopic = "dummy"
}

firecloud {
max-filematching-bucket-files = 25000
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ import akka.http.scaladsl.testkit.RouteTestTimeout
import akka.http.scaladsl.unmarshalling.Unmarshal
import better.files.File
import org.apache.commons.lang3.StringUtils
import org.broadinstitute.dsde.firecloud.FireCloudConfig
import org.broadinstitute.dsde.firecloud.dataaccess.MockRawlsDAO
import org.broadinstitute.dsde.firecloud.filematch.FileMatchingOptions
import org.broadinstitute.dsde.firecloud.filematch.FileMatchingOptionsFormat.fileMatchingOptionsFormat
import org.broadinstitute.dsde.firecloud.mock.MockGoogleServicesDAO
import org.broadinstitute.dsde.firecloud.model._
import org.broadinstitute.dsde.firecloud.webservice.{CookieAuthedApiService, ExportEntitiesApiService}
import org.broadinstitute.dsde.rawls.model.ErrorReport
import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport.ErrorReportFormat
import org.broadinstitute.dsde.workbench.model.google.{GcsBucketName, GcsObjectName}
import org.mockito.ArgumentMatchers
import org.mockito.ArgumentMatchers.any
Expand Down Expand Up @@ -403,6 +406,27 @@ class ExportEntitiesByTypeServiceSpec
responseAs[String] shouldBe expected
}
}

s"should throw an error if the bucket contains too many files" in {
val configuredMax = FireCloudConfig.FireCloud.maxFileMatchingFileCount
val bucketListResponse: List[GcsObjectName] =
Range.apply(0, configuredMax + 1).map(idx => GcsObjectName(s"file$idx")).toList

when(mockitoGoogleServicesDao.listBucket(any(), any(), any())).thenReturn(bucketListResponse)
val fileMatchingOptions = FileMatchingOptions("prefix")
Post("/api/workspaces/broad-dsde-dev/valid/entities/my-entity-type/paired-tsv",
fileMatchingOptions
) ~> dummyUserIdHeaders("1234") ~> sealRoute(
exportEntitiesRoutes
) ~> check {
handled should be(true)
status should be(BadRequest)
contentType shouldEqual ContentTypes.`application/json`
val actualError = responseAs[ErrorReport]
actualError.message should startWith("Too many files in bucket")
}
}

}

val validCookieFireCloudEntitiesLargeSampleTSVPath =
Expand Down

0 comments on commit 9505b6e

Please sign in to comment.