forked from andor-halab1/FISHerMan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blastAbundantRNA.m
94 lines (78 loc) · 3.08 KB
/
blastAbundantRNA.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
% Blast probes against abundant RNA database. Only blast against the
% complementary sequences of abundant RNA. Rule out the possibility of
% homology of 15 nt or more.
function [Header,Sequence,nonSequence,nonSequence2] = blastAbundantRNA (adapterList,Header,Sequence,nonSequence,nonSequence2,params)
% params = struct('species','Mouse','verbose',1,...
% 'number',48,'seqNum',1000,'thres',30,'querySize',30,...
% 'blastArgs','-S 2','parallel', 0,...
% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
if isempty(nonSequence)
nonSequence = Sequence;
end
if isempty(nonSequence2)
nonSequence2 = Sequence; % meant seq2? TK
end
if params(1).verbose
disp('Removing non-specific probes against the abundant RNA database');
% But here nothing happens? TK
% Split one giant fasta file into smaller ones, so that parallel computing is possible
disp(' spliting fasta files for parallel computing');
end
filePathList = blastFileSplit(Header, Sequence, params(1).seqNum);
fileNum = length(filePathList);
% Blast mouse oligos against abundant RNA
DbPath = [params(1).species '.abundantrnaDb.fas'];
params(1).DbSize = getDbSize(DbPath);
eValue = bitScore2eValue(params(1).thres, params(1).querySize, params(1).DbSize);
blastArgs = [params(1).blastArgs ' -e ' num2str(eValue)];
blastData = {};
if params(1).parallel
poolobj = parpool;
verbose = params(1).verbose;
parfor k = 1:fileNum
if verbose
disp([' blasting temporary file no. ' num2str(k)]);
end
blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs);
end
delete(poolobj);
else
for k = 1:fileNum
if params(1).verbose
disp([' blasting temporary file no. ' num2str(k)]);
startTime = tic;
end
blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs);
if params(1).verbose
totalTime = toc(startTime);
disp([' elapsed time is ' num2str(totalTime) ' seconds']);
end
end
end
data = [];
for k = 1:length(blastData)
data = [data blastData{k,1}];
end
% Find out the oligos that non-specifically hit abundant rna database
seqDelete = [];
for n = 1:length(data)
% data(n).Query is one-segment Header with '='
% data(n).Hits(m).Name is one-segment Header
% different transcripts from the same gene will cause flag == 1
for m = 1:length(data(n).Hits)
if isempty(strfind(data(n).Query,data(n).Hits(m).Name))
seqDelete = [seqDelete;n];
break
end
end
end
Header(seqDelete)= [];
Sequence(seqDelete)= [];
nonSequence(seqDelete)= [];
nonSequence2(seqDelete)= [];
% Optional check how many transcripts are left after this step of screening
if params(1).verbose && length(adapterList) ~= 0 % check the second condition TK
[geneNumLeft,geneNumDelete] = checkTranscriptsLeft(adapterList,Header);
disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete), ' FISH escaped FISHerMan''s net']);
end
fileCleanUp(filePathList);