Skip to content

Commit

Permalink
Minor bug fixes
Browse files Browse the repository at this point in the history
	modified:   bedExt.pl
	modified:   extractPeak.pl
	modified:   joinWrapper.py
	modified:   tag2peak.pl
  • Loading branch information
zhangchaolin committed Oct 9, 2020
1 parent a71e580 commit 7edc906
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 60 deletions.
4 changes: 4 additions & 0 deletions bedExt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@
{
open ($fin, "gunzip -c $in |") || Carp::croak "cannot open file $in to read\n";
}
elsif ($in =~/\.bz2$/)
{
open ($fin, "bunzip2 -c $in |") || Carp::croak "cannot open file $in to read\n";
}
else
{
open ($fin, "<$in") || Carp::croak "cannot open file $in to read\n";
Expand Down
4 changes: 4 additions & 0 deletions extractPeak.pl
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@
{
open ($fin, "gunzip -c $inWiggleFile | ") || Carp::croak "cannot open file $inWiggleFile to read\n";
}
elsif ($inWiggleFile =~/\.bz2$/)
{
open ($fin, "bunzip2 -c $inWiggleFile | ") || Carp::croak "cannot open file $inWiggleFile to read\n";
}
else
{
open ($fin, "<$inWiggleFile") || Carp::croak "cannot open file $inWiggleFile to read\n";
Expand Down
120 changes: 61 additions & 59 deletions joinWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,72 +30,74 @@

#!/usr/bin/env python
#Guruprasad Ananda
#modifications to support python3 by sebastien weyn
"""
This tool provides the UNIX "join" functionality.
"""
import sys, os, tempfile, subprocess

def stop_err(msg):
sys.stderr.write(msg)
sys.exit()
sys.stderr.write(msg)
sys.exit()

def main():
infile1 = sys.argv[1]
infile2 = sys.argv[2]
field1 = int(sys.argv[3])
field2 = int(sys.argv[4])
mode =sys.argv[5]
outfile = sys.argv[6]

tmpfile1 = tempfile.NamedTemporaryFile()
tmpfile2 = tempfile.NamedTemporaryFile()

try:
#Sort the two files based on specified fields
os.system("sort -t ' ' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1))
os.system("sort -t ' ' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2))
except Exception, exc:
stop_err( 'Initialization error -> %s' %str(exc) )

option = ""
for line in file(tmpfile1.name):
line = line.strip()
if line:
elems = line.split('\t')
for j in range(1,len(elems)+1):
if j == 1:
option = "1.1"
else:
option = option + ",1." + str(j)
break

#check if join has --version option. BSD join doens't have this option, while GNU join does.
#The return value in the latter case will be 0, and non-zero in the latter case.
ret = subprocess.call('join --version 2>/dev/null', shell=True)
# check if we are a version later than 7 of join. If so, we want to skip
# checking the order since join will raise an error with duplicated items in
# the two files being joined.
if ret == 0:
cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE)
(stdout, _) = cl.communicate()
version_line = stdout.split("\n")[0]
(version, _) = version_line.split()[-1].split(".")
if int(version) >= 7:
flags = "--nocheck-order"
else:
flags = ""
else:
flags = ""
infile1 = sys.argv[1]
infile2 = sys.argv[2]
field1 = int(sys.argv[3])
field2 = int(sys.argv[4])
mode =sys.argv[5]
outfile = sys.argv[6]

tmpfile1 = tempfile.NamedTemporaryFile()
tmpfile2 = tempfile.NamedTemporaryFile()

try:
#Sort the two files based on specified fields
os.system("sort -t ' ' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1))
os.system("sort -t ' ' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2))
except:
stop_err( 'Initialization error -> %s' %str(exc) )

option = ""
with open(tmpfile1.name, 'r') as infile:
for line in infile:
line = line.strip()
if line:
elems = line.split('\t')
for j in range(1,len(elems)+1):
if j == 1:
option = "1.1"
else:
option = option + ",1." + str(j)
break

#check if join has --version option. BSD join doens't have this option, while GNU join does.
#The return value in the latter case will be 0, and non-zero in the latter case.
ret = subprocess.run('join --version 2>/dev/null', shell=True)
# check if we are a version later than 7 of join. If so, we want to skip
# checking the order since join will raise an error with duplicated items in
# the two files being joined.
if ret.returncode == 0:
cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE)
(stdout, _) = cl.communicate()
version_line = str(stdout).split("\\n")[0]
(version, _) = version_line.split()[-1].split(".")
if int(version) >= 7:
flags = "--nocheck-order"
else:
flags = ""
else:
flags = ""

if mode == "V":
cmdline = "join %s -t ' ' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
else:
cmdline = "join %s -t ' ' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
try:
os.system(cmdline)
except Exception, exj:
stop_err('Error joining the two datasets -> %s' %str(exj))
if mode == "V":
cmdline = "join %s -t ' ' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
else:
cmdline = "join %s -t ' ' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
try:
os.system(cmdline)
except:
stop_err('Error joining the two datasets -> %s' %str(exj))
if __name__ == "__main__":
main()
main()
4 changes: 3 additions & 1 deletion tag2peak.pl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@

GetOptions (
'big'=>\$big,
'minBlockSize:i'=>\$minBlockSize,
'p:f'=>\$pvalueThreshold,
'ss'=>\$separateStrand,
'valley-seeking'=>\$valleySeeking,
Expand Down Expand Up @@ -82,6 +83,7 @@
print "Options:\n";
#print " -e : expression values indicated in the gene bed file\n";
print " -big : big input file\n";
print " -minBlockSize [int] : minimim number of lines to read in each block for a big file ($minBlockSize)\n";
print " -ss : separate the two strands\n";
print " --valley-seeking : find candidate peaks by valley seeking\n";
print " --valley-depth [float] : depth of valley if valley seeking ($valleyDepth)\n";
Expand Down Expand Up @@ -340,7 +342,7 @@
my $geneTagCountTotal = 0;
map {$geneTagCountTotal += $geneTagCountHash{$_}->{'count'}} keys %geneTagCountHash;

Carp::croak "no tags??" if $geneTagCountTotal <= 0;
Carp::croak "No match between tags and gene references? check your chromosome names in your tag file\n" if $geneTagCountTotal <= 0;

print "total number of tags overlapping with specified reions: $geneTagCountTotal\n" if $verbose;

Expand Down

0 comments on commit 7edc906

Please sign in to comment.