From 7edc9069de682dd04729a5d478d28fd26e2d6fae Mon Sep 17 00:00:00 2001 From: Chaolin Zhang Date: Fri, 9 Oct 2020 15:51:53 +0000 Subject: [PATCH] Minor bug fixes modified: bedExt.pl modified: extractPeak.pl modified: joinWrapper.py modified: tag2peak.pl --- bedExt.pl | 4 ++ extractPeak.pl | 4 ++ joinWrapper.py | 120 +++++++++++++++++++++++++------------------------ tag2peak.pl | 4 +- 4 files changed, 72 insertions(+), 60 deletions(-) diff --git a/bedExt.pl b/bedExt.pl index 4fcd072..9f80b1b 100755 --- a/bedExt.pl +++ b/bedExt.pl @@ -80,6 +80,10 @@ { open ($fin, "gunzip -c $in |") || Carp::croak "cannot open file $in to read\n"; } + elsif ($in =~/\.bz2$/) + { + open ($fin, "bunzip2 -c $in |") || Carp::croak "cannot open file $in to read\n"; + } else { open ($fin, "<$in") || Carp::croak "cannot open file $in to read\n"; diff --git a/extractPeak.pl b/extractPeak.pl index 29861d4..ed56b72 100755 --- a/extractPeak.pl +++ b/extractPeak.pl @@ -93,6 +93,10 @@ { open ($fin, "gunzip -c $inWiggleFile | ") || Carp::croak "cannot open file $inWiggleFile to read\n"; } + elsif ($inWiggleFile =~/\.bz2$/) + { + open ($fin, "bunzip2 -c $inWiggleFile | ") || Carp::croak "cannot open file $inWiggleFile to read\n"; + } else { open ($fin, "<$inWiggleFile") || Carp::croak "cannot open file $inWiggleFile to read\n"; diff --git a/joinWrapper.py b/joinWrapper.py index fd75465..98e8537 100755 --- a/joinWrapper.py +++ b/joinWrapper.py @@ -30,72 +30,74 @@ #!/usr/bin/env python #Guruprasad Ananda +#modifications to support python3 by sebastien weyn """ This tool provides the UNIX "join" functionality. """ import sys, os, tempfile, subprocess def stop_err(msg): - sys.stderr.write(msg) - sys.exit() + sys.stderr.write(msg) + sys.exit() def main(): - infile1 = sys.argv[1] - infile2 = sys.argv[2] - field1 = int(sys.argv[3]) - field2 = int(sys.argv[4]) - mode =sys.argv[5] - outfile = sys.argv[6] - - tmpfile1 = tempfile.NamedTemporaryFile() - tmpfile2 = tempfile.NamedTemporaryFile() - - try: - #Sort the two files based on specified fields - os.system("sort -t ' ' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1)) - os.system("sort -t ' ' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2)) - except Exception, exc: - stop_err( 'Initialization error -> %s' %str(exc) ) - - option = "" - for line in file(tmpfile1.name): - line = line.strip() - if line: - elems = line.split('\t') - for j in range(1,len(elems)+1): - if j == 1: - option = "1.1" - else: - option = option + ",1." + str(j) - break - - #check if join has --version option. BSD join doens't have this option, while GNU join does. - #The return value in the latter case will be 0, and non-zero in the latter case. - ret = subprocess.call('join --version 2>/dev/null', shell=True) - # check if we are a version later than 7 of join. If so, we want to skip - # checking the order since join will raise an error with duplicated items in - # the two files being joined. - if ret == 0: - cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE) - (stdout, _) = cl.communicate() - version_line = stdout.split("\n")[0] - (version, _) = version_line.split()[-1].split(".") - if int(version) >= 7: - flags = "--nocheck-order" - else: - flags = "" - else: - flags = "" + infile1 = sys.argv[1] + infile2 = sys.argv[2] + field1 = int(sys.argv[3]) + field2 = int(sys.argv[4]) + mode =sys.argv[5] + outfile = sys.argv[6] + + tmpfile1 = tempfile.NamedTemporaryFile() + tmpfile2 = tempfile.NamedTemporaryFile() + + try: + #Sort the two files based on specified fields + os.system("sort -t ' ' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1)) + os.system("sort -t ' ' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2)) + except: + stop_err( 'Initialization error -> %s' %str(exc) ) + + option = "" + with open(tmpfile1.name, 'r') as infile: + for line in infile: + line = line.strip() + if line: + elems = line.split('\t') + for j in range(1,len(elems)+1): + if j == 1: + option = "1.1" + else: + option = option + ",1." + str(j) + break + + #check if join has --version option. BSD join doens't have this option, while GNU join does. + #The return value in the latter case will be 0, and non-zero in the latter case. + ret = subprocess.run('join --version 2>/dev/null', shell=True) + # check if we are a version later than 7 of join. If so, we want to skip + # checking the order since join will raise an error with duplicated items in + # the two files being joined. + if ret.returncode == 0: + cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE) + (stdout, _) = cl.communicate() + version_line = str(stdout).split("\\n")[0] + (version, _) = version_line.split()[-1].split(".") + if int(version) >= 7: + flags = "--nocheck-order" + else: + flags = "" + else: + flags = "" - if mode == "V": - cmdline = "join %s -t ' ' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) - else: - cmdline = "join %s -t ' ' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) - - try: - os.system(cmdline) - except Exception, exj: - stop_err('Error joining the two datasets -> %s' %str(exj)) - + if mode == "V": + cmdline = "join %s -t ' ' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) + else: + cmdline = "join %s -t ' ' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) + + try: + os.system(cmdline) + except: + stop_err('Error joining the two datasets -> %s' %str(exj)) + if __name__ == "__main__": - main() + main() diff --git a/tag2peak.pl b/tag2peak.pl index 04401e6..7ebc083 100755 --- a/tag2peak.pl +++ b/tag2peak.pl @@ -52,6 +52,7 @@ GetOptions ( 'big'=>\$big, + 'minBlockSize:i'=>\$minBlockSize, 'p:f'=>\$pvalueThreshold, 'ss'=>\$separateStrand, 'valley-seeking'=>\$valleySeeking, @@ -82,6 +83,7 @@ print "Options:\n"; #print " -e : expression values indicated in the gene bed file\n"; print " -big : big input file\n"; + print " -minBlockSize [int] : minimim number of lines to read in each block for a big file ($minBlockSize)\n"; print " -ss : separate the two strands\n"; print " --valley-seeking : find candidate peaks by valley seeking\n"; print " --valley-depth [float] : depth of valley if valley seeking ($valleyDepth)\n"; @@ -340,7 +342,7 @@ my $geneTagCountTotal = 0; map {$geneTagCountTotal += $geneTagCountHash{$_}->{'count'}} keys %geneTagCountHash; - Carp::croak "no tags??" if $geneTagCountTotal <= 0; + Carp::croak "No match between tags and gene references? check your chromosome names in your tag file\n" if $geneTagCountTotal <= 0; print "total number of tags overlapping with specified reions: $geneTagCountTotal\n" if $verbose;