Skip to content

Commit

Permalink
Merge branch 'release/0.3.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
abought committed Jun 6, 2022
2 parents c7903b1 + d726fb2 commit d0a94e2
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 24 deletions.
4 changes: 3 additions & 1 deletion assets/js/pages/gwas_upload.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,16 @@ modal.$on('has_options', function (parser_options) { // Close with options selec
let is_valid;
try {
const sample_data = rows.slice(first_data_index).map(row => parser(row));
// Note: in future, may want to reflect the "allowed chromosomes whitelist" in frontend validator
// (for now, there's a slight gap between what the web UI and the server check when giving feedback)
is_valid = validateSortOrder(sample_data);
} catch (e) {
// TODO: Improve UI representation of other parsing errors, like AF out of range
console.error('error encountered', e);
is_valid = false;
}
if (!is_valid) { // slightly vague message; in rare cases some middle data rows may be unparseable
fileField.setCustomValidity('Your file must be sorted by chromosome and position prior to upload.');
fileField.setCustomValidity('Validation error. Check that all column options are specified correctly. Your file must be sorted by chromosome and position prior to upload.');
}
});
});
Expand Down
6 changes: 4 additions & 2 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,10 @@
'TEST_REQUEST_DEFAULT_FORMAT': 'vnd.api+json'
}

# The maximum region size (bp) for a single Locuszoom plot. This is used to prevent ginormous API calls.
LZ_MAX_REGION_SIZE = 1_000_000
# The maximum region size (bp) for a single Locuszoom plot. This is used to prevent ginormous API calls. Value is set
# for JS in LocalZoom, separately.
LZ_MAX_REGION_SIZE = 2_000_000

# The "official" domain name. This is set in a .env file, and it must exactly match the base url registered as part of
# your OAuth provider configuration (eg callback urls). It should be a domain, not an IP.
LZ_OFFICIAL_DOMAIN = env('LZ_OFFICIAL_DOMAIN', default='my.locuszoom.org')
Expand Down
3 changes: 2 additions & 1 deletion locuszoom_plotting_service/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def _query_params(self) -> ty.Tuple[str, int, int]:
raise drf_exceptions.ParseError('"end" position must be greater than "start"')

if not (0 <= (end - start) <= settings.LZ_MAX_REGION_SIZE):
raise drf_exceptions.ParseError(f'Cannot handle requested region size. Max allowed is {500_000}')
raise drf_exceptions.ParseError(
f'Cannot handle requested region size. Max allowed is {settings.LZ_MAX_REGION_SIZE}')

return chrom, start, end
2 changes: 1 addition & 1 deletion locuszoom_plotting_service/templates/pages/about.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ <h3 class="faq" id="prepare-data">How should I prepare my data for uploading?</h
<li>By marker (chrom_pos:ref/alt): <i>9:22125503_G/C</i>, OR</li>
<li>By individual columns (preferred)
<ul>
<li>Chromosome</li>
<li>Chromosome (1-25, X, Y, M, or MT; please contact us if you need other chromosome names)</li>
<li>Position</li>
<li>Ref. allele (according to human reference genome) (optional for plots, but required for LD)</li>
<li>Alt. allele (according to human reference genome) (optional for plots, but required for LD)</li>
Expand Down
20 changes: 10 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"d3": "^5.16.0",
"d3-tip": "0.9.1",
"gwas-credible-sets": "^0.1.0",
"localzoom": "git+https://github.com/statgen/localzoom.git#b78103b",
"localzoom": "git+https://github.com/statgen/localzoom.git#84215bf",
"locuszoom": "0.14.0",
"lodash": "^4.17.21",
"pako": "^1.0.11",
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ pyyaml # Required for OpenAPI rendering
boltons~=20.2.1
scipy~=1.5.3
python-magic==0.4.18
zorp[perf,lookups]==0.3.4
zorp[perf,lookups]==0.3.6
genelocator==1.1.2
12 changes: 8 additions & 4 deletions tests/ingest/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,19 @@ def test_positions_not_sorted(self):
with pytest.raises(val_exc.ValidationException):
validators.standard_gwas_validator._validate_contents(reader)

def test_rejects_rsids(self):
def test_whitelists_chroms(self):
reader = sniffers.guess_gwas_generic([
"#chrom\tpos\tref\talt\tneg_log_pvalue",
"X\t1\tA\tC\t7.3",
"rs1100\t2\tA\tC\t7.3",
"rs2521\t1\tA\tC\t7.3",
"chr1\t2\tA\tC\t7.3",
"invalid\t1\tA\tC\t7.3",
])

with pytest.raises(val_exc.ValidationException, match='is an rsID'):
with pytest.raises(
val_exc.ValidationException,
match="Chromosome INVALID is not a valid chromosome name. Must be "
"one of: '1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 M MT X Y'"
):
validators.standard_gwas_validator._validate_contents(reader)

def test_validates_for_file(self):
Expand Down
9 changes: 9 additions & 0 deletions util/ingest/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import functools
import logging
import re
import typing as ty

from . import exceptions
from zorp import exceptions as z_exc
Expand All @@ -29,3 +31,10 @@ def wrapper(*args, **kwargs):
logger.exception('Task failed due to unexpected error')
raise exceptions.UnexpectedIngestException
return wrapper


def natural_sort(items: ty.Iterable):
"""Natural sort a list of strings. Used for human-friendly error messages, eg, from a `set` of allowed strings"""
convert = lambda text: int(text) if text.isdigit() else text.lower() # noqa: E731
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] # noqa: E731
return sorted(items, key=alphanum_key)
21 changes: 18 additions & 3 deletions util/ingest/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@

logger = logging.getLogger(__name__)

# Whitelist of allowed chromosomes. It's ok to add more values, as long as we have some kind of whitelist.
# The generic parser uses these as a safeguard, because when people slip a non-categorical value into the chrom field,
# tabix uses all the RAM on the system and then crashes horribly. All our looser heuristics
ALLOWED_CHROMS = frozenset({
'1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
'21', '22', '23', '24', '25',
'X', 'Y', 'M', 'MT'
})


class _GwasValidator:
"""Validate a raw GWAS file as initially uploaded (given filename and instructions on how to parse it)"""
Expand Down Expand Up @@ -52,12 +61,18 @@ def _validate_data_rows(self, reader) -> bool:
for cp, tied_variants in cp_groups:
cur_chrom = cp[0]

if cur_chrom.startswith('RS'): # Parser always capitalizes chrom names
raise v_exc.ValidationException(f'Invalid chromosome specified: value "{cur_chrom}" is an rsID')
# Prevent server issues by imposting strict limits on what chroms are allowed
if cur_chrom not in ALLOWED_CHROMS:
options = ' '.join(helpers.natural_sort(ALLOWED_CHROMS))
raise v_exc.ValidationException(
f"Chromosome {cur_chrom} is not a valid chromosome name. Must be one of: '{options}'")

if cur_chrom == prev_chrom and cp[1] < prev_pos:
# Positions not in correct order for Pheweb to use
raise v_exc.ValidationException(f'Positions must be sorted prior to uploading. Position chr{cur_chrom}:{cp[1]} should not follow chr{prev_chrom}:{prev_pos}')
raise v_exc.ValidationException(
f'Positions must be sorted prior to uploading. '
f'Position chr{cur_chrom}:{cp[1]} should not follow chr{prev_chrom}:{prev_pos}'
)

if cur_chrom != prev_chrom:
if cur_chrom in chrom_seen:
Expand Down

0 comments on commit d0a94e2

Please sign in to comment.