changed to handle any nulber of dataset using their names

bcm-uga · Nov 22, 2024 · f11b5d0 · f11b5d0
1 parent 4906085
commit f11b5d0
Show file tree

Hide file tree

Showing 20 changed files with 1,319 additions and 628 deletions.
diff --git a/README.md b/README.md
@@ -57,6 +57,7 @@ sudo docker login -u  hombergn
 #upload on dockerhub
 sudo docker push hombergn/hadaca3_light:latest
 sudo docker push hombergn/hadaca3_pyr:latest
+sudo docker push hombergn/hadaca3_final:latest
 
 #Single command to build and push. 
 sudo docker build -t hombergn/hadaca3_pyr .  && sudo docker push hombergn/hadaca3_pyr:latest

diff --git a/phase-0-smoothie/bundle/overview.md b/phase-0-smoothie/bundle/overview.md
@@ -7,8 +7,6 @@ The HADACA challenge is a recurring event aimed at developping deconvolution met
 - The **first edition** took place in 2018, in collaboration with the Data Institute at University Grenoble-Alpes.
 - The **second edition** was held in 2019, in partnership with the Ligue contre le Cancer and sponsored by EIT Health.
 - The **third edition** is set for December 2024, in collaboration with the M4DI project, part of the PEPR Santé Numérique. Visit the official website for details: http://hadaca3.sciencesconf.org.
-
-
 
 **Aim of the challenge**     
 

diff --git a/phase-0-smoothie/bundle/terms.md b/phase-0-smoothie/bundle/terms.md
@@ -2,9 +2,6 @@
                            
 By participating to this challenge, you accept to publicly share your submissions.
 
-You may submit 5 submissions every day and 100 in total.
+You may submit 50 submissions every day and 500 in total.
 
 This challenge is governed by the general [ChaLearn contest rules](https://www.causality.inf.ethz.ch/GeneralChalearnContestRuleTerms.html).
-
-
-
diff --git a/phase-0-smoothie/generate_bundle_smoothie.sh b/phase-0-smoothie/generate_bundle_smoothie.sh
@@ -32,16 +32,14 @@ cp "$path_data"reference_fruits.rds input_data/reference_fruits.rds
 mkdir starting_kit
 rm -rf starting_kit/*
 cp -r input_data/* starting_kit/
-# sh generate_data.sh $1
-# sh generate_data.sh real
-echo 'data generated'
+
 
 # Zip folder 
 echo "create bunlde.zip"
 zip -FS -j -r  bundle/scoring_program.zip scoring_program/
 zip -FS -j -r  bundle/ingestion_program.zip ingestion_program/
 
-#Phase_0 is useless for now 
+# Without argument the script "generate_baselines.R" will create the Phase 0 baselines
 Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_0  
 cp ~/projects/hadaca3/templates/tmp/* starting_kit/
 rm -r ~/projects/hadaca3/templates/tmp/ 

diff --git a/phase-1_2_3/automated_docker_test.sh b/phase-1_2_3/automated_docker_test.sh
@@ -12,7 +12,8 @@ type Rscript >/dev/null 2>&1 || { echo >&2 "Rscript requiered but it's not insta
 # echo "Docker created"
 
 # docker_name=hombergn/hadaca3_light
-docker_name=hombergn/hadaca3_pyr
+# docker_name=hombergn/hadaca3_pyr
+docker_name=hombergn/hadaca3_final
 
 echo "Generate data"
 sh generate_data.sh $1
@@ -22,7 +23,8 @@ echo "data Generated"
 echo "Create submission program"
 cd starting_kit/
 rm -rf submissions
-Rscript submission_script.R >> logs
+# Rscript submission_script.R >> logs
+Rscript submission_script.R 
 # python submission_script.py
 cd - 
 echo "Done"

diff --git a/phase-1_2_3/bundle/competition.yaml b/phase-1_2_3/bundle/competition.yaml
@@ -45,7 +45,7 @@ phases:
   description: good luck 
   start: '2024-03-20 '
   end: '2024-12-20 11:00'
-  max_submissions_per_day: 5
+  max_submissions_per_day: 20
   max_submissions: 100
   execution_time_limit: 60000
   auto_migrate_to_this_phase: false
@@ -58,7 +58,7 @@ phases:
   description: good luck
   start: '2024-12-20 11:01'
   # end: '2024-10-20T00:10'
-  max_submissions_per_day: 5
+  max_submissions_per_day: 15
   max_submissions: 100
   execution_time_limit: 60000
   auto_migrate_to_this_phase: true
@@ -113,28 +113,28 @@ leaderboards:
     key: median_performance
     index: 0
     sorting: desc
-  - title: overall performance dataset 1
-    key: Accuracy_mean_1
-    index: 1
-    sorting: desc
-    hidden: false  
-  - title: overall performance dataset 2
-    key: Accuracy_mean_2
-    index: 2
-    sorting: desc
-    hidden: false
-  - title: overall performance dataset 3
-    key: Accuracy_mean_3
-    index: 3
-    sorting: desc
-    hidden: false
-  - title: overall performance dataset 4
-    key: Accuracy_mean_4
-    index: 4
-    sorting: desc
-    hidden: false
-  - title: Excecution Time global
-    key: Time
-    index: 5
-    sorting: desc
-    hidden: false
+  # - title: overall performance dataset 1
+  #   key: Accuracy_mean_1
+  #   index: 1
+  #   sorting: desc
+  #   hidden: false  
+  # - title: overall performance dataset 2
+  #   key: Accuracy_mean_2
+  #   index: 2
+  #   sorting: desc
+  #   hidden: false
+  # - title: overall performance dataset 3
+  #   key: Accuracy_mean_3
+  #   index: 3
+  #   sorting: desc
+  #   hidden: false
+  # - title: overall performance dataset 4
+  #   key: Accuracy_mean_4
+  #   index: 4
+  #   sorting: desc
+  #   hidden: false
+  # - title: Excecution Time global
+  #   key: Time
+  #   index: 5
+  #   sorting: desc
+  #   hidden: false
diff --git a/phase-1_2_3/bundle/terms.md b/phase-1_2_3/bundle/terms.md
@@ -1,6 +1,6 @@
 By participating to this challenge, you accept to publicly share your submissions.
 
-You may submit 5 submissions every day and 100 in total.
+You may submit 50 submissions every day and 500 in total.
 
 This challenge is governed by the general [ChaLearn contest rules](https://www.causality.inf.ethz.ch/GeneralChalearnContestRuleTerms.html).
 

diff --git a/phase-1_2_3/generate_bundle.sh b/phase-1_2_3/generate_bundle.sh
@@ -3,30 +3,27 @@ type Rscript >/dev/null 2>&1 || { echo >&2 "Rscript requiered but it's not insta
 
 
 echo "generate data"
-sh generate_data.sh $1
-# sh generate_data.sh real
+# sh generate_data.sh $1
+sh generate_data.sh real
 echo 'data generated'
 
 # Zip folder 
 echo "create bunlde.zip"
 zip -FS -j -r  bundle/scoring_program.zip scoring_program/
 zip -FS -j -r  bundle/ingestion_program.zip ingestion_program/
-cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
-
 
-zip -FS -j -r  bundle/ground_truth.zip ground_truth/
-zip -FS -j -r  bundle/ground_truth_final.zip ground_truth_final/
 
+#  generate baselines :
+Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_1  
+cp ~/projects/hadaca3/templates/tmp/* starting_kit/
 
+rm -r ~/projects/hadaca3/templates/tmp/ 
+cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
 
 
-# cd input_data/
-# zip -FS -r  ../bundle/input_data.zip *
-# cd .. 
+zip -FS -j -r  bundle/ground_truth.zip ground_truth/
+zip -FS -j -r  bundle/ground_truth_final.zip ground_truth_final/
 
-# cd input_data_final/
-# zip -FS -r  ../bundle/input_data_final.zip *
-# cd .. 
 
 zip -FS -r -j bundle/input_data.zip input_data/
 zip -FS -r -j bundle/input_data_final.zip input_data_final/

diff --git a/phase-1_2_3/generate_data.sh b/phase-1_2_3/generate_data.sh
@@ -7,9 +7,6 @@ echo cean data first
 sh clean.sh
 echo cleaning done. 
 
-nb_datasets=4
-
-
 
 mkdir input_data
 mkdir input_data_final
@@ -22,8 +19,10 @@ mkdir data
 
 #Test if argument exist
 
-if [ $# -lt 1 ]
+# if [ $# -lt 1 ]
+if [false]
 then
+nb_datasets=9
 echo "Creating fake data because there is no argument"
 
 Rscript generate_fake_data.R  $((nb_datasets )) 
@@ -37,8 +36,22 @@ echo "At least one argument exists, migrating real data. \n
 This script will only copy files from the project hadaca3_private, so make sure they exist and they are up to date."  
 echo $1
 
+# h3_p_path="~/projects/hadaca3_private/"
+# cur_path=$PWD
+
+# cd $h3_p_path
+# cmd_snakemake_clean="snakemake --cores 8 -s 00_run_pipeline.py -p clean"
+# # cmd_snakemake_build="snakemake --cores 8 -s 00_run_pipeline.py -p "
+# cmd_snakemake_build="snakemake --cores 4 -s 00_run_pipeline.py -p --resources mem_mb=16000 --max-jobs-per-second 1 generate_data"
+
+# $cmd_snakemake_clean 
+# $cmd_snakemake_build 
+
 ## This script anonymise the datasets and move them to the folder data. 
-Rscript prepare_real_data.R  $((nb_datasets )) 
+# Rscript prepare_real_data.R  $((nb_datasets )) 
+
+
+Rscript prepare_real_data.R 
 
 path_data="data/"
 
@@ -51,25 +64,45 @@ fi
 
 cp "$path_data"reference_data/reference_pdac.rds input_data/reference_pdac.rds
 cp "$path_data"reference_data/reference_pdac.rds input_data_final/reference_pdac.rds
+rm -rf "$path_data"reference_data/ 
 
 
-for i in  $(seq 1 $nb_datasets);
+for dir in "$path_data"*1/     # list directories in the form "/tmp/dirname/"
 do
-    echo "Number $i"
-
-    cp "$path_data""$i"/mixes_data_"$i".rds input_data/
-    cp "$path_data""$i"/ground_truth_"$i".rds ground_truth/
-
+    dir=${dir%*/}      # remove the trailing "/"
+    # echo "${dir##*/}"    # print everything after the final "/"
+    # echo $dir
+    cp "$dir"/mixes* input_data/
+    cp "$dir"/groundtruth* ground_truth/
 done
 
-for i in  $(seq $((nb_datasets+1)) $((nb_datasets * 2)));
+for dir in "$path_data"*2/     # list directories in the form "/tmp/dirname/"
 do
-    echo "Number $i"
-    num=$((i -nb_datasets ))
-    cp "$path_data""$i"/mixes_data_"$i".rds input_data_final/mixes_data_"$num".rds
-    cp "$path_data""$i"/ground_truth_"$i".rds ground_truth_final/ground_truth_"$num".rds
+    dir=${dir%*/}      # remove the trailing "/"
+    # echo "${dir##*/}"    # print everything after the final "/"
+    # echo $dir
+    cp "$dir"/mixes* input_data_final/
+    cp "$dir"/groundtruth* ground_truth_final/
 done
 
+
+# for i in  $(seq 1 $nb_datasets);
+# do
+#     echo "Number $i"
+
+#     cp "$path_data""$i"/mixes_data_"$i".rds input_data/
+#     cp "$path_data""$i"/ground_truth_"$i".rds ground_truth/
+
+# done
+
+# for i in  $(seq $((nb_datasets+1)) $((nb_datasets * 2)));
+# do
+#     echo "Number $i"
+#     num=$((i -nb_datasets ))
+#     cp "$path_data""$i"/mixes_data_"$i".rds input_data_final/mixes_data_"$num".rds
+#     cp "$path_data""$i"/ground_truth_"$i".rds ground_truth_final/ground_truth_"$num".rds
+# done
+
 mkdir starting_kit/data/
 cp -r input_data/* starting_kit/data/
 

diff --git a/phase-1_2_3/ingestion_program/ingestion.R b/phase-1_2_3/ingestion_program/ingestion.R
@@ -45,28 +45,7 @@ print(x = list.files(path = submission_program, all.files = TRUE, full.names = T
 print(x = "")
 
 ## output files
-# output_program       <- paste0(output, .Platform$file.sep, "output_program.txt")
-# output_profiling     <- paste0(output, .Platform$file.sep, "Rprof.out"         )
 output_profiling_rds <- paste0(output, .Platform$file.sep, "Rprof.rds"         )
-#output_dataType      <- paste0(output, .Platform$file.sep, "dataType.rds"      )
-# file.create(output_program, output_profiling)
-
-## diverting R output to a text file :
-## sink(file = output_program, append = FALSE)
-
-
-# Rprof(
-#     filename         = output_profiling
-#   , append           = FALSE
-#   , interval         = 0.02
-#   , memory.profiling = TRUE
-#   , gc.profiling     = FALSE
-#   , line.profiling   = FALSE
-# )
-# start_time <- proc.time()
-
-
-# Rprof(output_profiling,interval = 0.02)
 
 
 #Check it is a result submission or a program submission