Skip to content

Commit

Permalink
Add data_checkpinting.
Browse files Browse the repository at this point in the history
Signed-off-by: Revital Sur <[email protected]>
  • Loading branch information
revit13 committed Oct 13, 2024
1 parent 73434e7 commit c9fa99b
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion kfp/pipeline_generator/single-pipeline/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Steps to generate a new pipeline
- create a `pipeline_definitions.yaml` file for the required task (similar to the example [pipeline_definitions.yaml for the noop task](../../../transforms/universal/noop/kfp_ray/pipeline_definitions.yaml)).
- execute `make -C ../../../transforms workflow-venv` from this directory
- execute `source ../../../transforms/venv/bin/activate`
- execute `source ../../../transforms/venv/bin/activate && pip install pre_commit`
- execute `./run.sh --config_file <pipeline_definitions_file_path> --output_dir_file <destination directory>`. When `pipeline_definitions_file_path` is the path of the `pipeline_definitions.yaml` file that defines the pipeline and `destination directory` is a directory where new pipeline file
will be generated.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def compute_exec_params_func(
data_s3_config: str,
data_max_files: int,
data_num_samples: int,
data_checkpointing: bool,
runtime_pipeline_id: str,
runtime_job_id: str,
runtime_code_location: dict,
Expand All @@ -50,6 +51,7 @@ def compute_exec_params_func(
"data_s3_config": data_s3_config,
"data_max_files": data_max_files,
"data_num_samples": data_num_samples,
"data_checkpointing": data_checkpointing,
"runtime_num_workers": KFPUtils.default_compute_execution_params(str(worker_options), str(actor_options)),
"runtime_worker_options": str(actor_options),
"runtime_pipeline_id": runtime_pipeline_id,
Expand Down Expand Up @@ -177,6 +179,7 @@ def {{ pipeline_name }}(
data_s3_config=data_s3_config,
data_max_files=data_max_files,
data_num_samples=data_num_samples,
data_checkpointing=data_checkpointing,
runtime_pipeline_id=runtime_pipeline_id,
runtime_job_id=run_id,
runtime_code_location=runtime_code_location,
Expand Down
3 changes: 3 additions & 0 deletions transforms/language/html2parquet/kfp_ray/html2parquet_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def compute_exec_params_func(
data_s3_config: str,
data_max_files: int,
data_num_samples: int,
data_checkpointing: bool,
runtime_pipeline_id: str,
runtime_job_id: str,
runtime_code_location: dict,
Expand All @@ -48,6 +49,7 @@ def compute_exec_params_func(
"data_s3_config": data_s3_config,
"data_max_files": data_max_files,
"data_num_samples": data_num_samples,
"data_checkpointing": data_checkpointing,
"runtime_num_workers": KFPUtils.default_compute_execution_params(str(worker_options), str(actor_options)),
"runtime_worker_options": str(actor_options),
"runtime_pipeline_id": runtime_pipeline_id,
Expand Down Expand Up @@ -177,6 +179,7 @@ def html2parquet(
data_s3_config=data_s3_config,
data_max_files=data_max_files,
data_num_samples=data_num_samples,
data_checkpointing=data_checkpointing,
runtime_pipeline_id=runtime_pipeline_id,
runtime_job_id=run_id,
runtime_code_location=runtime_code_location,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pipeline_common_input_parameters_values:
kfp_base_image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
transform_image: "quay.io/dataprep1/data-prep-kit/html2parquet-ray:latest"
s3_access_secret: "s3-secret"
image_pull_secret: ""
input_folder: "test/html2parquet/input/"
output_folder: "test/html2parquet/output/"

Expand Down

0 comments on commit c9fa99b

Please sign in to comment.