Skip to content

Commit

Permalink
patch version: fix missing imports
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Oct 8, 2024
1 parent e50d131 commit e246ff8
Show file tree
Hide file tree
Showing 15 changed files with 301 additions and 43 deletions.
2 changes: 1 addition & 1 deletion pgscatalog.match/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pgscatalog.match"
version = "0.3.2"
version = "0.3.3"
description = "Tools for matching variants in PGS scoring files and target variant information files"
authors = ["Benjamin Wingfield <[email protected]>", "Samuel Lambert <[email protected]>", "Laurent Gil <[email protected]>"]
readme = "README.md"
Expand Down
6 changes: 3 additions & 3 deletions pgscatalog.match/src/pgscatalog/match/cli/_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import gzip
import itertools

from pgscatalog.core import MatchRateError
from pgscatalog.core.lib.pgsexceptions import MatchRateError

from ..lib.plinkscorefiles import PlinkScoreFiles
from pgscatalog.match.lib.plinkscorefiles import PlinkScoreFiles

from ._config import Config
from pgscatalog.match.cli._config import Config


def write_matches(matchresults, score_df):
Expand Down
6 changes: 3 additions & 3 deletions pgscatalog.match/src/pgscatalog/match/cli/match_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@

import polars as pl

from .. import (
from pgscatalog.match import (
VariantFrame,
ScoringFileFrame,
match_variants,
MatchResult,
MatchResults,
)

from ._config import Config
from ._write import write_matches
from pgscatalog.match.cli._config import Config
from pgscatalog.match.cli._write import write_matches

logger = logging.getLogger(__name__)

Expand Down
6 changes: 3 additions & 3 deletions pgscatalog.match/src/pgscatalog/match/cli/merge_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import pathlib
import shutil

from ._config import Config
from ._write import write_matches
from pgscatalog.match.cli._config import Config
from pgscatalog.match.cli._write import write_matches

from ..lib import ScoringFileFrame, MatchResult, MatchResults
from pgscatalog.match.lib import ScoringFileFrame, MatchResult, MatchResults

import polars as pl

Expand Down
8 changes: 4 additions & 4 deletions pgscatalog.match/src/pgscatalog/match/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import logging

from .variantframe import VariantFrame
from .scoringfileframe import ScoringFileFrame, match_variants
from .matchresult import MatchResult, MatchResults
from .plinkscorefiles import PlinkScoreFiles
from pgscatalog.match.lib.variantframe import VariantFrame
from pgscatalog.match.lib.scoringfileframe import ScoringFileFrame, match_variants
from pgscatalog.match.lib.matchresult import MatchResult, MatchResults
from pgscatalog.match.lib.plinkscorefiles import PlinkScoreFiles

log_fmt = "%(name)s: %(asctime)s %(levelname)-8s %(message)s"
logging.basicConfig(format=log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
Expand Down
24 changes: 18 additions & 6 deletions pgscatalog.match/src/pgscatalog/match/lib/_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
"""
import pathlib
import tempfile
from typing import Optional

import polars as pl
from functools import singledispatch

import polars.exceptions
from pgscatalog.core import NormalisedScoringFile, TargetVariants, TargetType
from polars.io.csv import BatchedCsvReader

from pgscatalog.match.lib.normalisedscoringfile import NormalisedScoringFile
from pgscatalog.match.lib.targetvariants import TargetVariants, TargetType


@singledispatch
Expand All @@ -25,7 +28,7 @@ def loose(path, tmpdir=None):


@loose.register
def _(path: NormalisedScoringFile, tmpdir=None):
def _(path: NormalisedScoringFile, tmpdir=None): # type: ignore
"""Write NormalisedScoringFiles to a list of arrow IPC files"""
if tmpdir is None:
tmpdir = tempfile.mkdtemp()
Expand All @@ -47,7 +50,7 @@ def _(path: NormalisedScoringFile, tmpdir=None):


@loose.register
def _(path: TargetVariants, tmpdir=None):
def _(path: TargetVariants, tmpdir=None): # type: ignore
"""Writes TargetVariants to a list of arrow IPC files"""
if tmpdir is None:
tmpdir = tempfile.mkdtemp()
Expand All @@ -63,7 +66,14 @@ def _(path: TargetVariants, tmpdir=None):
"column_5": pl.String,
"column_6": pl.String,
}
new_colnames = ["#CHROM", "ID", "CM", "POS", "REF", "ALT"]
new_colnames: Optional[list[str]] = [
"#CHROM",
"ID",
"CM",
"POS",
"REF",
"ALT",
]
header = False
comment = None
case TargetType.PVAR:
Expand Down Expand Up @@ -94,7 +104,9 @@ def _(path: TargetVariants, tmpdir=None):
return batch_read(reader, tmpdir=tmpdir, cols_keep=cols_keep)


def batch_read(reader, tmpdir, cols_keep) -> list[pathlib.Path]:
def batch_read(
reader: BatchedCsvReader, tmpdir: pathlib.Path, cols_keep: list[str]
) -> list[pathlib.Path]:
"""Read a CSV in batches and write them to temporary files"""
arrowpaths = []
# batch_size should be >= thread pool size, so tasks will be distributed amongst workers
Expand Down
16 changes: 9 additions & 7 deletions pgscatalog.match/src/pgscatalog/match/lib/_match/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
import polars as pl
from xopen import xopen

from .preprocess import complement_valid_alleles
from pgscatalog.match.lib._match.preprocess import complement_valid_alleles

logger = logging.getLogger(__name__)


def label_matches(
df: pl.LazyFrame,
keep_first_match,
remove_ambiguous,
remove_multiallelic,
skip_flip,
filter_IDs,
keep_first_match: bool,
remove_ambiguous: bool,
remove_multiallelic: bool,
skip_flip: bool,
filter_IDs: pathlib.Path,
) -> pl.LazyFrame:
"""Label match candidates with additional metadata. Column definitions:
Expand Down Expand Up @@ -220,7 +220,9 @@ def _label_duplicate_id(df: pl.LazyFrame, keep_first_match: bool) -> pl.LazyFram
)


def _label_biallelic_ambiguous(df: pl.LazyFrame, remove_ambiguous) -> pl.LazyFrame:
def _label_biallelic_ambiguous(
df: pl.LazyFrame, remove_ambiguous: bool
) -> pl.LazyFrame:
"""
Identify ambiguous variants (A/T & C/G SNPs)
Expand Down
2 changes: 1 addition & 1 deletion pgscatalog.match/src/pgscatalog/match/lib/_plinkframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import gzip
import pathlib

from ._match.plink import plinkify, pivot_score
from pgscatalog.match.lib._match.plink import plinkify, pivot_score


class PlinkFrame:
Expand Down
10 changes: 5 additions & 5 deletions pgscatalog.match/src/pgscatalog/match/lib/matchresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import polars as pl

from pgscatalog.core import ZeroMatchesError, MatchRateError
from pgscatalog.core.lib.pgsexceptions import ZeroMatchesError, MatchRateError

from ._plinkframe import PlinkFrames
from ._match.label import label_matches
from ._match.filter import filter_scores
from ._match.log import make_logs, check_log_count, make_summary_log
from pgscatalog.match.lib._plinkframe import PlinkFrames
from pgscatalog.match.lib._match.label import label_matches
from pgscatalog.match.lib._match.filter import filter_scores
from pgscatalog.match.lib._match.log import make_logs, check_log_count, make_summary_log

logger = logging.getLogger(__name__)

Expand Down
57 changes: 57 additions & 0 deletions pgscatalog.match/src/pgscatalog/match/lib/normalisedscoringfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import csv

from xopen import xopen

from pgscatalog.core.lib.models import ScoreVariant


def _read_normalised_rows(path):
with xopen(path) as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
yield ScoreVariant(**row)


class NormalisedScoringFile:
"""This class represents a ScoringFile that's been normalised to have a consistent format
Its main purpose is to provide a convenient way to iterate over variants
# TODO: replace with a pydantic model in pgscatalog.core
"""

def __init__(self, path):
try:
with xopen(path):
pass
except TypeError:
self.is_path = False
self.path = str(path)
else:
self.is_path = True
self.path = path
finally:
# either a ScoringFile or a path to a combined file
self._scoringfile = path

def __iter__(self):
yield from self.variants

@property
def variants(self):
if self.is_path:
# get a fresh generator from the file
self._variants = _read_normalised_rows(self._scoringfile)
else:
# get a fresh generator from the normalise() method
self._variants = self._scoringfile.normalise()

return self._variants

def __repr__(self):
if self.is_path:
x = f"{repr(str(self._scoringfile))}"
else:
x = f"{repr(self._scoringfile)}"

return f"{type(self).__name__}({x})"
10 changes: 5 additions & 5 deletions pgscatalog.match/src/pgscatalog/match/lib/scoringfileframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

import polars as pl

from pgscatalog.core import NormalisedScoringFile
from pgscatalog.match.lib.normalisedscoringfile import NormalisedScoringFile

from ._arrow import loose
from ._match.preprocess import complement_valid_alleles
from ._match.match import get_all_matches
from .matchresult import MatchResult
from pgscatalog.match.lib._arrow import loose
from pgscatalog.match.lib._match.preprocess import complement_valid_alleles
from pgscatalog.match.lib._match.match import get_all_matches
from pgscatalog.match.lib.matchresult import MatchResult

logger = logging.getLogger(__name__)

Expand Down
Loading

0 comments on commit e246ff8

Please sign in to comment.