Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update terraform modules via plugin #1976

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions v1/terraform/Cloud_BigQuery_to_Cloud_Datastore/dataflow_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ variable "region" {

variable "readQuery" {
type = string
description = "SQL query in standard SQL to pull data from BigQuery"
description = "A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table."

}

Expand All @@ -53,7 +53,7 @@ variable "invalidOutputPath" {

variable "datastoreWriteProjectId" {
type = string
description = "The Google Cloud project ID of where to write Datastore entities"
description = "The ID of the Google Cloud project to write the Datastore entities to."

}

Expand All @@ -70,14 +70,14 @@ variable "datastoreWriteNamespace" {
}

variable "datastoreHintNumWorkers" {
type = string
description = "Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to: 500."
type = number
description = "Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`."
default = null
}

variable "errorWritePath" {
type = string
description = "The error log output folder to use for write failures that occur during processing. (Example: gs://your-bucket/errors/)"
description = "The error log output file to use for write failures that occur during processing. (Example: gs://your-bucket/errors/)"

}

Expand Down Expand Up @@ -184,7 +184,7 @@ resource "google_dataflow_job" "generated" {
datastoreWriteProjectId = var.datastoreWriteProjectId
datastoreWriteEntityKind = var.datastoreWriteEntityKind
datastoreWriteNamespace = var.datastoreWriteNamespace
datastoreHintNumWorkers = var.datastoreHintNumWorkers
datastoreHintNumWorkers = tostring(var.datastoreHintNumWorkers)
errorWritePath = var.errorWritePath
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ variable "region" {

variable "readQuery" {
type = string
description = "SQL query in standard SQL to pull data from BigQuery"
description = "A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table."

}

Expand All @@ -53,31 +53,31 @@ variable "invalidOutputPath" {

variable "outputDirectory" {
type = string
description = "Cloud Storage directory to store output TFRecord files. (Example: gs://your-bucket/your-path)"
description = "The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output/train` (Example: gs://mybucket/output)"

}

variable "outputSuffix" {
type = string
description = "File suffix to append to TFRecord files. Defaults to .tfrecord"
description = "The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`."
default = null
}

variable "trainingPercentage" {
type = string
description = "Defaults to 1 or 100%. Should be decimal between 0 and 1 inclusive"
type = number
description = "The percentage of query data allocated to training TFRecord files. The default value is 1, or 100%."
default = null
}

variable "testingPercentage" {
type = string
description = "Defaults to 0 or 0%. Should be decimal between 0 and 1 inclusive"
type = number
description = "The percentage of query data allocated to testing TFRecord files. The default value is 0, or 0%."
default = null
}

variable "validationPercentage" {
type = string
description = "Defaults to 0 or 0%. Should be decimal between 0 and 1 inclusive"
type = number
description = "The percentage of query data allocated to validation TFRecord files. The default value is 0, or 0%."
default = null
}

Expand Down Expand Up @@ -183,9 +183,9 @@ resource "google_dataflow_job" "generated" {
invalidOutputPath = var.invalidOutputPath
outputDirectory = var.outputDirectory
outputSuffix = var.outputSuffix
trainingPercentage = var.trainingPercentage
testingPercentage = var.testingPercentage
validationPercentage = var.validationPercentage
trainingPercentage = tostring(var.trainingPercentage)
testingPercentage = tostring(var.testingPercentage)
validationPercentage = tostring(var.validationPercentage)
}

additional_experiments = var.additional_experiments
Expand Down
29 changes: 17 additions & 12 deletions v1/terraform/Cloud_Bigtable_to_GCS_Avro/dataflow_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,36 +35,40 @@ variable "region" {

variable "bigtableProjectId" {
type = string
description = "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from"
description = "The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from."

}

variable "bigtableInstanceId" {
type = string
description = "The ID of the Cloud Bigtable instance that contains the table"
description = "The ID of the Bigtable instance that contains the table."

}

variable "bigtableTableId" {
type = string
description = "The ID of the Cloud Bigtable table to read"
description = "The ID of the Bigtable table to export."

}

variable "outputDirectory" {
type = string
description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path)"
description = "The Cloud Storage path where data is written. (Example: gs://mybucket/somefolder)"

}

variable "filenamePrefix" {
type = string
description = <<EOT
The prefix of the Avro file name. For example, "table1-". Defaults to: part.
EOT
description = "The prefix of the Avro filename. For example, `output-`. Defaults to: part."
default = "part"
}

variable "bigtableAppProfileId" {
type = string
description = "The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile."
default = null
}


provider "google" {
project = var.project
Expand Down Expand Up @@ -162,11 +166,12 @@ resource "google_dataflow_job" "generated" {
provider = google
template_gcs_path = "gs://dataflow-templates-${var.region}/latest/Cloud_Bigtable_to_GCS_Avro"
parameters = {
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
bigtableAppProfileId = var.bigtableAppProfileId
}

additional_experiments = var.additional_experiments
Expand Down
39 changes: 23 additions & 16 deletions v1/terraform/Cloud_Bigtable_to_GCS_Json/dataflow_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,45 +35,51 @@ variable "region" {

variable "bigtableProjectId" {
type = string
description = "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from"
description = "The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from."

}

variable "bigtableInstanceId" {
type = string
description = "The ID of the Cloud Bigtable instance that contains the table"
description = "The ID of the Bigtable instance that contains the table."

}

variable "bigtableTableId" {
type = string
description = "The ID of the Cloud Bigtable table to read"
description = "The ID of the Bigtable table to read from."

}

variable "outputDirectory" {
type = string
description = "The Cloud Storage path where the output JSON files can be stored. (Example: gs://your-bucket/your-path/)"
default = null
description = "The Cloud Storage path where the output JSON files are stored. (Example: gs://your-bucket/your-path/)"

}

variable "filenamePrefix" {
type = string
description = <<EOT
The prefix of the JSON file name. For example, "table1-". Defaults to: part.
The prefix of the JSON file name. For example, "table1-". If no value is provided, defaults to `part`.
EOT
default = "part"
default = null
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's strange that the default is now null, when the comment (and the code, if I remember correctly) have "part" as the default

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generator is setup assuming either defaulted or optional, not defaulted and optional. I moved this PR to draft to fix this. Thanks for calling this out.

}

variable "userOption" {
type = string
description = "User option: `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to: NONE."
description = "Possible values are `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to `NONE`."
default = null
}

variable "columnsAliases" {
type = string
description = "Comma separated list of columns which are required for Vertex AI Vector Search Index. The `id` & `embedding` are required columns for Vertex Vector Search. You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, in which `rowkey` and the embedding column is named differently, `cf:my_embedding;embedding` and `rowkey;id` should be specified. Only used when FLATTEN user option is specified."
description = "A comma-separated list of columns that are required for the Vertex AI Vector Search index. The columns `id` and `embedding` are required for Vertex AI Vector Search. You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, where `rowkey` has a different name than the embedding column, specify `cf:my_embedding;embedding` and, `rowkey;id`. Only use this option when the value for `userOption` is `FLATTEN`."
default = null
}

variable "bigtableAppProfileId" {
type = string
description = "The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile."
default = null
}

Expand Down Expand Up @@ -174,13 +180,14 @@ resource "google_dataflow_job" "generated" {
provider = google
template_gcs_path = "gs://dataflow-templates-${var.region}/latest/Cloud_Bigtable_to_GCS_Json"
parameters = {
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
userOption = var.userOption
columnsAliases = var.columnsAliases
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
userOption = var.userOption
columnsAliases = var.columnsAliases
bigtableAppProfileId = var.bigtableAppProfileId
}

additional_experiments = var.additional_experiments
Expand Down
29 changes: 18 additions & 11 deletions v1/terraform/Cloud_Bigtable_to_GCS_Parquet/dataflow_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,25 @@ variable "region" {

variable "bigtableProjectId" {
type = string
description = "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from"
description = "The ID of the Google Cloud project that contains the Cloud Bigtable instance that you want to read data from."

}

variable "bigtableInstanceId" {
type = string
description = "The ID of the Cloud Bigtable instance that contains the table"
description = "The ID of the Cloud Bigtable instance that contains the table."

}

variable "bigtableTableId" {
type = string
description = "The ID of the Cloud Bigtable table to export"
description = "The ID of the Cloud Bigtable table to export."

}

variable "outputDirectory" {
type = string
description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path)"
description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse the directory path for date and time formatters. For example: gs://your-bucket/your-path."

}

Expand All @@ -67,7 +67,13 @@ EOT

variable "numShards" {
type = number
description = "The maximum number of output shards produced when writing. A higher number of shards means higher throughput for writing to Cloud Storage, but potentially higher data aggregation cost across shards when processing output Cloud Storage files. Default value is decided by Dataflow."
description = "The maximum number of output shards produced when writing. A higher number of shards means higher throughput for writing to Cloud Storage, but potentially higher data aggregation cost across shards when processing output Cloud Storage files. The default value is decided by Dataflow."
default = null
}

variable "bigtableAppProfileId" {
type = string
description = "The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile."
default = null
}

Expand Down Expand Up @@ -168,12 +174,13 @@ resource "google_dataflow_job" "generated" {
provider = google
template_gcs_path = "gs://dataflow-templates-${var.region}/latest/Cloud_Bigtable_to_GCS_Parquet"
parameters = {
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
numShards = tostring(var.numShards)
bigtableProjectId = var.bigtableProjectId
bigtableInstanceId = var.bigtableInstanceId
bigtableTableId = var.bigtableTableId
outputDirectory = var.outputDirectory
filenamePrefix = var.filenamePrefix
numShards = tostring(var.numShards)
bigtableAppProfileId = var.bigtableAppProfileId
}

additional_experiments = var.additional_experiments
Expand Down
14 changes: 7 additions & 7 deletions v1/terraform/Cloud_Bigtable_to_GCS_SequenceFile/dataflow_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,25 @@ variable "region" {

variable "bigtableProject" {
type = string
description = "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from. Defaults to job project."
description = "The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from."

}

variable "bigtableInstanceId" {
type = string
description = "The ID of the Cloud Bigtable instance that contains the table"
description = "The ID of the Bigtable instance that contains the table."

}

variable "bigtableTableId" {
type = string
description = "The ID of the Cloud Bigtable table to export"
description = "The ID of the Bigtable table to export."

}

variable "bigtableAppProfileId" {
type = string
description = "The ID of the Cloud Bigtable application profile to be used for the export"
description = "The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile."
default = null
}

Expand Down Expand Up @@ -83,14 +83,14 @@ variable "bigtableFilter" {

variable "destinationPath" {
type = string
description = "Cloud Storage path where data should be written. (Example: gs://your-bucket/your-path/)"
description = "The Cloud Storage path where data is written. (Example: gs://your-bucket/your-path/)"

}

variable "filenamePrefix" {
type = string
description = "The prefix for each shard in destinationPath. (Example: output-). Defaults to: part."
default = "part"
description = "The prefix of the SequenceFile filename. (Example: output-)"

}


Expand Down
Loading
Loading