generated from c17hawke/dvc-project-template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
dvc.yaml
71 lines (65 loc) · 2.26 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
stages:
DATA_INGESTION:
cmd: python src/stage_01_data_ingestion.py
params:
- params.py:
- INPUT_FILE_NAME_REGEX
deps:
- src/stage_01_data_ingestion.py
- src/utils/common.py
- src/data_processing/data_validation.py
- src/data_processing/data_transformation.py
- src/data_processing/load_data.py
- src/db_operation/db_operations.py
- configs/config.py
outs:
- artifacts/sql_data_dir/Insurance_fraud_validated_data_db.db
# - artifacts/local_data_dir/valid_data/model_training_validated_data_file.csv
- artifacts/local_data_dir/Training_archive_bad_data
DATA_PROCESSING:
cmd: python src/stage_02_data_processing.py
params:
- params.py:
- MAPPING_CATEGORICAL_COLUMNS
- COLUMNS_TO_IGNORE_FOR_MODEL_TRAINING
deps:
- src/stage_02_data_processing.py
- src/utils/common.py
- configs/config.py
- src/data_processing/data_preprocessing.py
- src/db_operation/db_operations.py
- artifacts/sql_data_dir/Insurance_fraud_validated_data_db.db
outs:
- artifacts/local_data_dir/processed_data/model_training_processed_data_file.csv
DATA_CLUSTERING:
cmd: python src/stage_03_data_clustering.py
params:
- params.py:
- TARGET_COLUMN_NAME
- CLUSTER_COLUMN_NAME
deps:
- src/stage_03_data_clustering.py
- src/utils/common.py
- configs/config.py
- src/data_processing/data_clustering.py
- artifacts/local_data_dir/processed_data/model_training_processed_data_file.csv
outs:
- artifacts/local_data_dir/clustered_data/model_training_clustered_data_file.csv
- artifacts/graphs_dir/elbow_plot_cluster.png
- artifacts/model_dir/data_clustering_model/k_means_model.h5
# Other stages...
MODEL_TRAINING:
cmd: python src/stage_04_model_training.py
params:
- params.py:
- XGBOOST_HYPER_PARAMS
- SVM_HYPER_PARAMS
deps:
- src/stage_03_data_clustering.py
- src/stage_04_model_training.py
- src/utils/common.py
- configs/config.py
- src/model_training/model_training.py
- artifacts/local_data_dir/clustered_data/model_training_clustered_data_file.csv
outs:
- artifacts/model_dir/prediction_model_dir/