Skip to content

Commit

Permalink
New script: Saphyr average concentration and CV calculation (#479) (m…
Browse files Browse the repository at this point in the history
…inor)

### Added

- New script to calculate average concentration and CV value for Saphyr samples
  • Loading branch information
idalindegaard authored Mar 26, 2024
1 parent 41b63d8 commit 649cab5
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cg_lims/EPPs/udf/calculate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from cg_lims.EPPs.udf.calculate.calculate_resuspension_buffer_volumes import (
calculate_resuspension_buffer_volume,
)
from cg_lims.EPPs.udf.calculate.calculate_saphyr_concentration import calculate_saphyr_concentration
from cg_lims.EPPs.udf.calculate.calculate_water import volume_water
from cg_lims.EPPs.udf.calculate.calculate_water_volume_rna import calculate_water_volume_rna
from cg_lims.EPPs.udf.calculate.get_missing_reads import get_missing_reads
Expand Down Expand Up @@ -60,5 +61,6 @@ def calculate(ctx):
calculate.add_command(novaseq_x_volumes)
calculate.add_command(pool_normalization)
calculate.add_command(novaseq_x_denaturation)
calculate.add_command(calculate_saphyr_concentration)
calculate.add_command(ont_aliquot_volume)
calculate.add_command(ont_available_sequencing_reload)
92 changes: 92 additions & 0 deletions cg_lims/EPPs/udf/calculate/calculate_saphyr_concentration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging
import sys
from typing import List

import click
import numpy as np
from cg_lims.exceptions import LimsError, MissingUDFsError
from cg_lims.get.artifacts import get_artifacts
from genologics.entities import Artifact

LOG = logging.getLogger(__name__)


def get_concentrations(artifact: Artifact) -> List[float]:
"""Returns a list of all concentration replicates called
Concentration 1 (ng/ul), Concentration 2 (ng/ul) and Concentration 3 (ng/ul) of an artifact."""
udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"]
concentrations = []
for name in udf_names:
concentrations.append(artifact.udf.get(name))
return concentrations


def calculate_average_concentration(concentrations: List[float]) -> float:
"""Returns the average concentration of the list concentrations"""
return float(np.mean(concentrations))


def calculate_cv(concentrations: List[float]) -> float:
"""Calculates the coefficient of variance of the concentrations with the average concentration
that was retrieved from calculate_average_concentration"""
average_concentration = np.mean(concentrations)
std_deviation = np.std(concentrations)
coefficient_variation = std_deviation / average_concentration
return coefficient_variation


def set_average_and_cv(artifact: Artifact) -> None:
"""Calls on the previous functions get_concentration, calculate_average_concentration and calculate_cv
and updates the UDFs Average concentration (ng/ul) and Coefficient of variation (CV) with the calculated values
"""
concentrations = get_concentrations(artifact=artifact)
average_concentration = calculate_average_concentration(concentrations=concentrations)
coefficient_variation = calculate_cv(concentrations=concentrations)

artifact.udf["Average concentration (ng/ul)"] = average_concentration
artifact.udf["Coefficient of variation (CV)"] = coefficient_variation
artifact.put()


def validate_udf_values(artifact: Artifact) -> bool:
"""A function checking whether a concentration in the list of concentrations for each artifact has a negative/no/zero value.
Then the function returns the output as 'False' and logs all those sample IDs in the EPP log"""
udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"]
output = True
for name in udf_names:
if not artifact.udf.get(name) or artifact.udf.get(name) < 0:
output = False
LOG.info(
f"Sample {artifact.samples[0].id} has an invalid concentration value for {name}. Skipping."
)
return output


@click.command()
@click.pass_context
def calculate_saphyr_concentration(ctx) -> None:
"""Calculates and sets the average concentration and coefficient of variance based on three given concentrations.
Returns a message if this worked well, and if there were negative/no/zero concentration values, there's an error message for this
"""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
process = ctx.obj["process"]

try:
artifacts: List[Artifact] = get_artifacts(process=process, measurement=True)
failed_samples = 0
for artifact in artifacts:
if not validate_udf_values(artifact=artifact):
failed_samples += 1
continue
set_average_and_cv(artifact=artifact)
if failed_samples:
raise MissingUDFsError(
f"{failed_samples} samples have invalid concentration values (<= 0). See log for more information."
)
message = "The average concentration and coefficient of variance have been calculated for all samples."
LOG.info(message)
click.echo(message)
except LimsError as e:
LOG.error(e.message)
sys.exit(e.message)

0 comments on commit 649cab5

Please sign in to comment.