From a0be99720cce840688fd701c664d0f50fd5504af Mon Sep 17 00:00:00 2001 From: jonca79 <54137490+jonca79@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:29:42 +0100 Subject: [PATCH 1/2] fix: handle missing files better This fixes when there are samples in units.tsv that are missing in samples.tsv. It also fixes when there is no outputfiles for a output using a log message instead of an error. --- workflow/rules/common_references.smk | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk index 426a6f1a..d925ac21 100644 --- a/workflow/rules/common_references.smk +++ b/workflow/rules/common_references.smk @@ -5,6 +5,7 @@ __license__ = "GPL-3" import pandas as pd import yaml +import logging from snakemake.utils import validate from snakemake.utils import min_version @@ -13,6 +14,8 @@ from hydra_genetics.utils.resources import load_resources from hydra_genetics.utils.samples import * from hydra_genetics.utils.units import * +log = logging.getLogger() + min_version("7.18.0") ### Set and validate config file @@ -142,10 +145,14 @@ def get_cnvkit_antitarget(units: pandas.DataFrame, name: str) -> typing.List[str def get_files(units: pandas.DataFrame, name: str, string_path: str): types = [] + sample_list = get_samples(samples) for i in output_spec["files"]: if i["name"] == name: types = i["types"] - data = [string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples()] + data = [ + string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples() + if t.sample in sample_list + ] if not data: - raise Exception(f"Couldn't create file list using name: {name}, {string_path}") + log.warning(f"No files matching the output files found for rules using name: {name}, {string_path}") return set(data) From 7d7c904f4a4f4845ee405d0732e710e81458fee9 Mon Sep 17 00:00:00 2001 From: jonca79 <54137490+jonca79@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:33:46 +0100 Subject: [PATCH 2/2] tests: snakefmt --- workflow/rules/common_references.smk | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk index d925ac21..d3024855 100644 --- a/workflow/rules/common_references.smk +++ b/workflow/rules/common_references.smk @@ -149,10 +149,7 @@ def get_files(units: pandas.DataFrame, name: str, string_path: str): for i in output_spec["files"]: if i["name"] == name: types = i["types"] - data = [ - string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples() - if t.sample in sample_list - ] + data = [string_path % (t.sample, t.type) for t in units[units["type"].isin(types)].itertuples() if t.sample in sample_list] if not data: log.warning(f"No files matching the output files found for rules using name: {name}, {string_path}") return set(data)