From a0be99720cce840688fd701c664d0f50fd5504af Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Wed, 7 Feb 2024 13:29:42 +0100
Subject: [PATCH 1/2] fix: handle missing files better

This fixes when there are samples in units.tsv that are missing in samples.tsv.
It also fixes when there is no outputfiles for a output using a log message instead of an error.
---
 workflow/rules/common_references.smk | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index 426a6f1a..d925ac21 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -5,6 +5,7 @@ __license__ = "GPL-3"
 
 import pandas as pd
 import yaml
+import logging
 from snakemake.utils import validate
 from snakemake.utils import min_version
 
@@ -13,6 +14,8 @@ from hydra_genetics.utils.resources import load_resources
 from hydra_genetics.utils.samples import *
 from hydra_genetics.utils.units import *
 
+log = logging.getLogger()
+
 min_version("7.18.0")
 
 ### Set and validate config file
@@ -142,10 +145,14 @@ def get_cnvkit_antitarget(units: pandas.DataFrame, name: str) -> typing.List[str
 
 def get_files(units: pandas.DataFrame, name: str, string_path: str):
     types = []
+    sample_list = get_samples(samples)
     for i in output_spec["files"]:
         if i["name"] == name:
             types = i["types"]
-    data = [string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples()]
+    data = [
+        string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples()
+        if t.sample in sample_list
+    ]
     if not data:
-        raise Exception(f"Couldn't create file list using name: {name}, {string_path}")
+        log.warning(f"No files matching the output files found for rules using name: {name}, {string_path}")
     return set(data)

From 7d7c904f4a4f4845ee405d0732e710e81458fee9 Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Wed, 7 Feb 2024 13:33:46 +0100
Subject: [PATCH 2/2] tests: snakefmt

---
 workflow/rules/common_references.smk | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index d925ac21..d3024855 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -149,10 +149,7 @@ def get_files(units: pandas.DataFrame, name: str, string_path: str):
     for i in output_spec["files"]:
         if i["name"] == name:
             types = i["types"]
-    data = [
-        string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples()
-        if t.sample in sample_list
-    ]
+    data = [string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples() if t.sample in sample_list]
     if not data:
         log.warning(f"No files matching the output files found for rules using name: {name}, {string_path}")
     return set(data)