diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java index 1238271c791e..c4a5e88ba9b4 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java @@ -3194,14 +3194,34 @@ public Write withMaxFilesPerBundle(int maxFilesPerBundle) { return toBuilder().setMaxFilesPerBundle(maxFilesPerBundle).build(); } - @VisibleForTesting - Write withMaxFileSize(long maxFileSize) { + /** + * Controls the maximum byte size per file to be loaded into BigQuery. If the amount of data + * written to one file reaches this threshold, we will close that file and continue writing in a + * new file. + * + *

The default value (4 TiB) respects BigQuery's maximum number of source URIs per job + * configuration. + * + * @see BigQuery Load Job + * Limits + */ + public Write withMaxFileSize(long maxFileSize) { checkArgument(maxFileSize > 0, "maxFileSize must be > 0, but was: %s", maxFileSize); return toBuilder().setMaxFileSize(maxFileSize).build(); } - @VisibleForTesting - Write withMaxFilesPerPartition(int maxFilesPerPartition) { + /** + * Controls how many files will be assigned to a single BigQuery load job. If the number of + * files increases past this threshold, we will spill it over into multiple load jobs as + * necessary. + * + *

The default value (10,000 files) respects BigQuery's maximum number of source URIs per job + * configuration. + * + * @see BigQuery Load Job + * Limits + */ + public Write withMaxFilesPerPartition(int maxFilesPerPartition) { checkArgument( maxFilesPerPartition > 0, "maxFilesPerPartition must be > 0, but was: %s",