Skip to content

Commit

Permalink
Move Shine-Dalgarno motif computation to _sequence.h header
Browse files Browse the repository at this point in the history
  • Loading branch information
althonos committed Sep 15, 2023
1 parent d69ad2e commit 01e3d43
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 207 deletions.
152 changes: 152 additions & 0 deletions pyrodigal/_sequence.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,156 @@ static int _mer_ndx(const uint8_t* digits, const int slen, const int i, const in
}
return ndx;
}

static int _shine_dalgarno_exact(const uint8_t* digits, const int slen, int pos, int start, const double rwt[28], const int strand) {
int i;
int j;
int k;
int mism;
int rdis;
int limit;
int max_val;
int cur_ctr;
int match[6];
int dis_flag;
int cur_val = 0;

limit = imin(6, start-4-pos);
for(i = 0; i < 6; i++) match[i] = -10;

/* Compare the 6-base region to AGGAGG */
for(i = 0; i < limit; i++) {
if(i%3 == 0 && _is_a(digits, slen, pos+i, strand))
match[i] = 2;
else if(i%3 != 0 && _is_g(digits, slen, pos+i, strand))
match[i] = 3;
}

/* Find the maximally scoring motif */
max_val = 0;
for(i = limit; i >= 3; i--) {
for(j = 0; j <= limit-i; j++) {
cur_ctr = -2;
mism = 0;
for(k = j; k < j+i; k++) {
cur_ctr += match[k];
if(match[k] < 0) mism++;
}
if(mism > 0) continue;
rdis = start - (pos+j+i);
if(rdis < 5 && i < 5) dis_flag = 2;
else if(rdis < 5 && i >= 5) dis_flag = 1;
else if(rdis > 10 && rdis <= 12 && i < 5) dis_flag = 1;
else if(rdis > 10 && rdis <= 12 && i >= 5) dis_flag = 2;
else if(rdis >= 13) { dis_flag = 3; }
else dis_flag = 0;
if(rdis > 15 || cur_ctr < 6) continue;

/* Exact-Matching RBS Motifs */
if(cur_ctr < 6) cur_val = 0;
else if(cur_ctr == 6 && dis_flag == 2) cur_val = 1;
else if(cur_ctr == 6 && dis_flag == 3) cur_val = 2;
else if(cur_ctr == 8 && dis_flag == 3) cur_val = 3;
else if(cur_ctr == 9 && dis_flag == 3) cur_val = 3;
else if(cur_ctr == 6 && dis_flag == 1) cur_val = 6;
else if(cur_ctr == 11 && dis_flag == 3) cur_val = 10;
else if(cur_ctr == 12 && dis_flag == 3) cur_val = 10;
else if(cur_ctr == 14 && dis_flag == 3) cur_val = 10;
else if(cur_ctr == 8 && dis_flag == 2) cur_val = 11;
else if(cur_ctr == 9 && dis_flag == 2) cur_val = 11;
else if(cur_ctr == 8 && dis_flag == 1) cur_val = 12;
else if(cur_ctr == 9 && dis_flag == 1) cur_val = 12;
else if(cur_ctr == 6 && dis_flag == 0) cur_val = 13;
else if(cur_ctr == 8 && dis_flag == 0) cur_val = 15;
else if(cur_ctr == 9 && dis_flag == 0) cur_val = 16;
else if(cur_ctr == 11 && dis_flag == 2) cur_val = 20;
else if(cur_ctr == 11 && dis_flag == 1) cur_val = 21;
else if(cur_ctr == 11 && dis_flag == 0) cur_val = 22;
else if(cur_ctr == 12 && dis_flag == 2) cur_val = 20;
else if(cur_ctr == 12 && dis_flag == 1) cur_val = 23;
else if(cur_ctr == 12 && dis_flag == 0) cur_val = 24;
else if(cur_ctr == 14 && dis_flag == 2) cur_val = 25;
else if(cur_ctr == 14 && dis_flag == 1) cur_val = 26;
else if(cur_ctr == 14 && dis_flag == 0) cur_val = 27;

if(rwt[cur_val] < rwt[max_val]) continue;
if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue;
max_val = cur_val;
}
}

return max_val;
}

static int _shine_dalgarno_mm(const uint8_t* digits, const int slen, int pos, int start, const double rwt[28], const int strand) {
int i;
int j;
int k;
int mism;
int rdis;
int limit;
int max_val;
int cur_val = 0;
int match[6];
int cur_ctr;
int dis_flag;

limit = imin(6, start-4-pos);
for(i = 0; i < 6; i++) match[i] = -10;

/* Compare the 6-base region to AGGAGG */
for(i = 0; i < limit; i++) {
if(i % 3 == 0) {
if(_is_a(digits, slen, pos+i, strand) == 1) match[i] = 2;
else match[i] = -3;
}
else {
if(_is_g(digits, slen, pos+i, strand) == 1) match[i] = 3;
else match[i] = -2;
}
}

/* Find the maximally scoring motif */
max_val = 0;
for(i = limit; i >= 5; i--) {
for(j = 0; j <= limit-i; j++) {
cur_ctr = -2;
mism = 0;
for(k = j; k < j+i; k++) {
cur_ctr += match[k];
if(match[k] < 0) mism++;
if(match[k] < 0 && (k <= j+1 || k >= j+i-2)) cur_ctr -= 10;
}
if(mism != 1) continue;
rdis = start - (pos+j+i);
if(rdis < 5) { dis_flag = 1; }
else if(rdis > 10 && rdis <= 12) { dis_flag = 2; }
else if(rdis >= 13) { dis_flag = 3; }
else dis_flag = 0;
if(rdis > 15 || cur_ctr < 6) continue;

/* Single-Mismatch RBS Motifs */
if(cur_ctr < 6) cur_val = 0;
else if(cur_ctr == 6 && dis_flag == 3) cur_val = 2;
else if(cur_ctr == 7 && dis_flag == 3) cur_val = 2;
else if(cur_ctr == 9 && dis_flag == 3) cur_val = 3;
else if(cur_ctr == 6 && dis_flag == 2) cur_val = 4;
else if(cur_ctr == 6 && dis_flag == 1) cur_val = 5;
else if(cur_ctr == 6 && dis_flag == 0) cur_val = 9;
else if(cur_ctr == 7 && dis_flag == 2) cur_val = 7;
else if(cur_ctr == 7 && dis_flag == 1) cur_val = 8;
else if(cur_ctr == 7 && dis_flag == 0) cur_val = 14;
else if(cur_ctr == 9 && dis_flag == 2) cur_val = 17;
else if(cur_ctr == 9 && dis_flag == 1) cur_val = 18;
else if(cur_ctr == 9 && dis_flag == 0) cur_val = 19;

if(rwt[cur_val] < rwt[max_val]) continue;
if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue;
max_val = cur_val;
}
}

return max_val;
}

#endif
2 changes: 2 additions & 0 deletions pyrodigal/_sequence.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ cdef extern from "_sequence.h" nogil:
cdef bint _is_ttg(const uint8_t* digits, const int slen, const int i, const int strand) noexcept

cdef int _mer_ndx(const uint8_t* digits, const int slen, const int i, const int length, const int strand) noexcept
cdef int _shine_dalgarno_exact(const uint8_t* digits, const int slen, int pos, int start, double *rwt, const int strand)
cdef int _shine_dalgarno_mm(const uint8_t* digits, const int slen, int pos, int start, double *rwt, const int strand)
14 changes: 0 additions & 14 deletions pyrodigal/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,6 @@ cdef class Sequence:
bint is_init=*,
char unknown_residue=*
) noexcept nogil
cdef int _shine_dalgarno_exact(
self,
const int pos,
const int start,
const double rbs_wt[28],
const int strand
) noexcept nogil
cdef int _shine_dalgarno_mm(
self,
const int pos,
const int start,
const double rbs_wt[28],
const int strand
) noexcept nogil

cpdef size_t __sizeof__(self)

Expand Down
Loading

0 comments on commit 01e3d43

Please sign in to comment.