Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Count NTC reads in sequencing QC check #531

Merged
merged 3 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions cg_lims/EPPs/qc/sequencing_quality_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@

from cg_lims.EPPs.qc.models import SampleLane, SampleLaneSet
from cg_lims.EPPs.qc.sequencing_artifact_manager import SequencingArtifactManager
from cg_lims.exceptions import MissingSampleError
from cg_lims.get.samples import is_negative_control
from cg_lims.models.sample_lane_sequencing_metrics import SampleLaneSequencingMetrics
from cg_lims.status_db_api import StatusDBAPI
from genologics.entities import Sample
from genologics.lims import Lims

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -39,15 +43,17 @@ def _get_sequencing_metrics(self) -> List[SampleLaneSequencingMetrics]:
self.metrics = metrics
return metrics

def validate_sequencing_quality(self) -> str:
def validate_sequencing_quality(self, lims: Lims) -> str:
"""Validate the sequencing data for each sample in all lanes on a flow cell based on the number of reads and q30 scores."""
LOG.info(f"Validating sequencing quality for flow cell {self.flow_cell_name}")

sequencing_metrics = self._get_sequencing_metrics()

for metrics in sequencing_metrics:
passed_qc: bool = self._quality_control(metrics)
self._update_sample_with_quality_results(metrics, passed_qc)
passed_qc: bool = self._quality_control(metrics=metrics, lims=lims)
self._update_sample_with_quality_results(
metrics=metrics, passed_quality_control=passed_qc
)

if not passed_qc:
self.failed_qc_count += 1
Expand All @@ -67,14 +73,28 @@ def _update_sample_with_quality_results(
passed_quality_control=passed_quality_control,
)

def _quality_control(self, metrics: SampleLaneSequencingMetrics) -> bool:
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
)
def _quality_control(self, metrics: SampleLaneSequencingMetrics, lims: Lims) -> bool:
try:
sample: Sample = Sample(lims=lims, id=metrics.sample_internal_id)
negative_control: bool = is_negative_control(sample=sample)
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
negative_control=negative_control,
)
except MissingSampleError:
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
negative_control=False,
)

def _passes_quality_thresholds(self, q30_score: float, reads: int) -> bool:
"""Check if the provided metrics pass the minimum quality thresholds."""
def _passes_quality_thresholds(
self, q30_score: float, reads: int, negative_control: bool
) -> bool:
"""Check if the provided metrics pass the minimum quality thresholds. Negative controls always pass."""
if negative_control:
return True
passes_q30_threshold = q30_score >= self.q30_threshold
passes_read_threshold = reads >= self.READS_MIN_THRESHOLD
return passes_q30_threshold and passes_read_threshold
Expand Down
2 changes: 1 addition & 1 deletion cg_lims/EPPs/qc/sequencing_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def sequencing_quality_control(ctx):
cg_api_client=status_db_api,
)

quality_summary: str = quality_checker.validate_sequencing_quality()
quality_summary: str = quality_checker.validate_sequencing_quality(lims=lims)
brief_summary: str = quality_checker.get_brief_summary()

if quality_checker.samples_failed_quality_control():
Expand Down
14 changes: 14 additions & 0 deletions cg_lims/get/samples.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import List
from xml.etree.ElementTree import ParseError

from cg_lims.exceptions import MissingSampleError
from genologics.entities import Artifact, Process, Sample
Expand Down Expand Up @@ -37,3 +38,16 @@ def get_one_sample_from_artifact(artifact: Artifact) -> Sample:
raise MissingSampleError(message=more_than_one_message)

return samples[0]


def is_negative_control(sample: Sample) -> bool:
"""Check if a given sample is a negative control."""
try:
control: str = sample.udf.get("Control")
if control == "negative":
return True
return False
except ParseError:
error_message = f"Sample {sample} can't be found in the database."
LOG.error(error_message)
raise MissingSampleError(error_message)
31 changes: 19 additions & 12 deletions tests/EPPs/qc/test_sequencing_quality_checker.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from typing import List

from cg_lims.EPPs.qc.sequencing_quality_checker import SequencingQualityChecker
from genologics.lims import Lims
from mock import Mock


def test_quality_control_of_flow_cell_with_all_passing(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_passing_metrics_response: Mock,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples passes the quality control
# GIVEN a flow cell with one negative control where all samples passes the quality control
mocker.patch("requests.get", return_value=novaseq_passing_metrics_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN no samples should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 0
Expand All @@ -25,15 +27,16 @@ def test_all_samples_fail_q30(
novaseq_sample_ids: List[str],
novaseq_lanes,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples fail the quality control on Q30
# GIVEN a flow cell with one negative control where all samples fail the quality control on Q30
mocker.patch("requests.get", return_value=novaseq_q30_fail_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples in all lanes should fail the quality control
expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails


Expand All @@ -43,28 +46,30 @@ def test_all_samples_have_too_few_reads(
novaseq_sample_ids: List[str],
novaseq_lanes: int,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples in all lanes have too few reads
# GIVEN a flow cell with one negative control where all samples in all lanes have too few reads
mocker.patch("requests.get", return_value=novaseq_reads_fail_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples in all lanes should fail the quality control
expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails


def test_some_samples_fail_quality_control(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_two_failing_metrics_response: Mock,
mocker,
lims: Lims,
):
# GIVEN a flow cell where some samples fail the quality control
# GIVEN a flow cell with one negative control where some samples (not the NTC) fail the quality control
mocker.patch("requests.get", return_value=novaseq_two_failing_metrics_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN some samples in all lanes should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 2
Expand All @@ -76,12 +81,13 @@ def test_metrics_missing_for_samples_in_lane(
missing_sample_id: str,
missing_lane: int,
mocker,
lims: Lims,
):
# GIVEN metrics missing data for a sample in lims
mocker.patch("requests.get", return_value=novaseq_missing_metrics_for_sample_in_lane_response)

# WHEN validating the sequencing quality
summary: str = sequencing_quality_checker.validate_sequencing_quality()
summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN the sample with missing metrics should fail qc
assert sequencing_quality_checker.failed_qc_count == 1
Expand All @@ -96,12 +102,13 @@ def test_sample_missing_in_lims(
novaseq_metrics_with_extra_sample_response: Mock,
sample_id_missing_in_lims: str,
mocker,
lims: Lims,
):
# GIVEN metrics with a sample not in lims
mocker.patch("requests.get", return_value=novaseq_metrics_with_extra_sample_response)

# WHEN validating the sequencing quality
summary: str = sequencing_quality_checker.validate_sequencing_quality()
summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples pass the quality control
assert sequencing_quality_checker.failed_qc_count == 0
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/novaseq_standard/samples/ACC9628A2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@
<udf:field type="Numeric" name="Reads missing (M)">0</udf:field>
<udf:field type="String" name="Index type">NEXTflex® v2 UDI Barcodes 1 - 96</udf:field>
<udf:field type="String" name="Index number">13</udf:field>
<udf:field type="String" name="Control">negative</udf:field>
</smp:sample>
Loading