pachterlab · myrefoli · Aug 20, 2017 · Aug 24, 2017 · Dec 14, 2017
diff --git a/src/#KmerIndex.cpp# b/src/#KmerIndex.cpp#
diff --git a/src/Bootstrap.cpp b/src/Bootstrap.cpp
@@ -82,10 +82,13 @@ void BootstrapWorker::operator() (){
       // release write lock
     } else {
       // can write out plaintext in parallel
+
+      std::vector<double> tpm = counts_to_tpm(res.alpha_, pool_.eff_lens_);
+
       plaintext_writer(pool_.opt_.output + "/bs_abundance_" +
           std::to_string(cur_id) + ".tsv",
           pool_.index_.target_names_, res.alpha_,
-          pool_.eff_lens_, pool_.index_.target_lens_);
+		       pool_.eff_lens_, pool_.index_.target_lens_, tpm);
     }
   }
 }
diff --git a/src/Bootstrap.h b/src/Bootstrap.h
@@ -10,6 +10,7 @@
 #include "EMAlgorithm.h"
 #include "Multinomial.hpp"
 #include "H5Writer.h"
+#include "PlaintextWriter.h"
 
 class Bootstrap {
     // needs:

diff --git a/src/H5Writer.cpp b/src/H5Writer.cpp
@@ -188,5 +188,7 @@ void H5Converter::rw_from_counts(hid_t group_id, const std::string& count_name,
   std::vector<double> alpha;
   read_dataset(group_id, count_name.c_str(), alpha);
 
-  plaintext_writer(out_fname, targ_ids_, alpha, eff_lengths_, lengths_);
+  std::vector<double> tpm = counts_to_tpm(alpha, eff_lengths_);
+
+  plaintext_writer(out_fname, targ_ids_, alpha, eff_lengths_, lengths_, tpm);
 }
diff --git a/src/KmerIndex.cpp b/src/KmerIndex.cpp
@@ -87,6 +87,7 @@ void KmerIndex::BuildTranscripts(const ProgramOptions& opt) {
   int countNonNucl = 0;
   int countUNuc = 0;
   int polyAcount = 0;
+  bool notAdded = true;
 
   for (auto& fasta : opt.transfasta) {
     fp = gzopen(fasta.c_str(), "r");
@@ -129,6 +130,17 @@ void KmerIndex::BuildTranscripts(const ProgramOptions& opt) {
         name = name.substr(0,p);
       }
 
+      std::string tag = "rrDK961j";
+      if(notAdded) {
+	if(opt.constructed_diff_index) {
+	  if(fasta == opt.diff_index) {
+	    name = name + tag;
+	    //startPositions.push_back(target_lens_.size()-1);
+	    notAdded = false;
+	  }
+	}
+      }
+
       if (unique_names.find(name) != unique_names.end()) {
         if (!opt.make_unique) {
           std::cerr << "Error: repeated name in FASTA file " << fasta << "\n" << name << "\n\n" << "Run with --make-unique to replace repeated names with unique names" << std::endl;

diff --git a/src/PlaintextWriter.cpp b/src/PlaintextWriter.cpp
@@ -29,7 +29,8 @@ void plaintext_writer(
     const std::vector<std::string>& targ_ids,
     const std::vector<double>& alpha,
     const std::vector<double>& eff_lens,
-    const std::vector<int>& lens
+    const std::vector<int>& lens,
+    const std::vector<double>& tpm
     ){
 
   std::ofstream of;
@@ -41,8 +42,6 @@ void plaintext_writer(
     exit(1);
   }
 
-  auto tpm = counts_to_tpm(alpha, eff_lens);
-
   of << "target_id" << "\t"
     /* << "kallisto_id" << "\t" */
     << "length" << "\t"

diff --git a/src/PlaintextWriter.h b/src/PlaintextWriter.h
@@ -11,12 +11,16 @@
 
 #include "KmerIndex.h"
 
+std::vector<double> counts_to_tpm(const std::vector<double>& est_counts,
+				  const std::vector<double>& eff_lens);
+
 void plaintext_writer(
     const std::string& out_name,
     const std::vector<std::string>& targ_ids,
     const std::vector<double>& alpha,
     const std::vector<double>& eff_lens,
-    const std::vector<int>& lens
+    const std::vector<int>& lens,
+    const std::vector<double>& tpm
     );
 
 std::string to_json(const std::string& id, const std::string& val, bool quote,

diff --git a/src/common.h b/src/common.h
@@ -9,7 +9,11 @@
 struct ProgramOptions {
   bool verbose;
   int threads;
+  // new arguments
   std::string index;
+  std::string diff_index;
+  bool constructed_diff_index;
+  bool analyzing_diff;
   int k;
   int iterations;
   std::string output;
@@ -51,6 +55,10 @@ ProgramOptions() :
   sd(0.0),
   min_range(1),
   bootstrap(0),
+  //new program options
+  constructed_diff_index(false),
+  analyzing_diff(false),
+  //new program options   
   batch_mode(false),
   plaintext(false),
   write_index(false),

diff --git a/src/main.cpp b/src/main.cpp
@@ -33,14 +33,15 @@ using namespace std;
 void ParseOptionsIndex(int argc, char **argv, ProgramOptions& opt) {
   int verbose_flag = 0;
   int make_unique_flag = 0;
-  const char *opt_string = "i:k:";
+  const char *opt_string = "i:k:d:";
   static struct option long_options[] = {
     // long args
     {"verbose", no_argument, &verbose_flag, 1},
     {"make-unique", no_argument, &make_unique_flag, 1},
     // short args
     {"index", required_argument, 0, 'i'},
     {"kmer-size", required_argument, 0, 'k'},
+    {"diff", required_argument, 0, 'd'},
     {0,0,0,0}
   };
   int c;
@@ -63,6 +64,12 @@ void ParseOptionsIndex(int argc, char **argv, ProgramOptions& opt) {
       stringstream(optarg) >> opt.k;
       break;
     }
+    case 'd': {
+      opt.diff_index = optarg;
+      opt.constructed_diff_index = true;
+      opt.make_unique = true;
+      break;
+    }
     default: break;
     }
   }
@@ -77,6 +84,10 @@ void ParseOptionsIndex(int argc, char **argv, ProgramOptions& opt) {
   for (int i = optind; i < argc; i++) {
     opt.transfasta.push_back(argv[i]);
   }
+
+  if (opt.constructed_diff_index) {
+    opt.transfasta.push_back(opt.diff_index);
+  }  
 }
 
 void ParseOptionsInspect(int argc, char **argv, ProgramOptions& opt) {
@@ -123,6 +134,7 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions& opt) {
   int bias_flag = 0;
   int pbam_flag = 0;
   int fusion_flag = 0;
+  int diff_flag = 0;
 
   const char *opt_string = "t:i:l:s:o:n:m:d:b:";
   static struct option long_options[] = {
@@ -137,6 +149,7 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions& opt) {
     {"pseudobam", no_argument, &pbam_flag, 1},
     {"fusion", no_argument, &fusion_flag, 1},
     {"seed", required_argument, 0, 'd'},
+    {"different", no_argument, &diff_flag, 1},
     // short args
     {"threads", required_argument, 0, 't'},
     {"index", required_argument, 0, 'i'},
@@ -204,6 +217,10 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions& opt) {
     opt.files.push_back(argv[i]);
   }
 
+  if (diff_flag) {
+    opt.analyzing_diff = true;
+  }
+
   if (verbose_flag) {
     opt.verbose = true;
   }
@@ -489,6 +506,19 @@ bool CheckOptionsIndex(ProgramOptions& opt) {
     cerr << "Error: need to specify kallisto index name" << endl;
     ret = false;
   }
+  if(opt.constructed_diff_index) {
+    if(opt.diff_index.empty()) {
+      cerr << "Error: diff file missing" << endl;
+      ret = false;
+    } else {
+      struct stat stFileInfo;
+      auto intStat = stat(opt.diff_index.c_str(), &stFileInfo);
+      if(intStat != 0) {
+	cerr << "Error: diff file not found" << opt.diff_index << endl;
+	ret = false;
+      }
+    }
+  }
 
   return ret;
 }
@@ -946,6 +976,7 @@ void usageIndex() {
        << "-i, --index=STRING          Filename for the kallisto index to be constructed " << endl << endl
        << "Optional argument:" << endl
        << "-k, --kmer-size=INT         k-mer (odd) length (default: 31, max value: " << (Kmer::MAX_K-1) << ")" << endl
+       << "-d, --diff=STRING           File of the different database " << endl
        << "    --make-unique           Replace repeated target names with unique names" << endl
        << endl;
 
@@ -992,7 +1023,8 @@ void usageEM(bool valid_input = true) {
        << "                              (default: -l, -s values are estimated from paired" << endl
        << "                               end data, but are required when using --single)" << endl
        << "-t, --threads=INT             Number of threads to use (default: 1)" << endl
-       << "    --pseudobam               Output pseudoalignments in SAM format to stdout" << endl;
+       << "    --pseudobam               Output pseudoalignments in SAM format to stdout" << endl
+       << "    --different               Perform synthetic depletion on different file" << endl;
 
 }
 
@@ -1183,8 +1215,36 @@ int main(int argc, char *argv[]) {
             start_time,
             call);
 
+	std::vector<double> tpm = counts_to_tpm(em.alpha_, em.eff_lens_);
+
+	if (opt.analyzing_diff) {
+	  std::vector<std::string> name2;
+	  std::vector<double> alpha2;
+	  std::vector<double> eff_lens2;
+	  std::vector<int> target_lens2;
+	  std::vector<double> tpm2;
+
+	  int position = -1;
+	  for (int i = 0; i < em.target_names_.size(); i++) {
+	    if(em.target_names_[i].find("rrDK961j") != std::string::npos) {
+	      position = i;
+	    }
+	  }
+	  assert(position >= 0);
+	  std::cout << "Position: " << position << std::endl;
+
+	  for (int j = position; j < em.target_names_.size(); j++) {
+	    name2.push_back(em.target_names_[j]);
+	    alpha2.push_back(em.alpha_[j]);
+	    eff_lens2.push_back(em.eff_lens_[j]);
+	    target_lens2.push_back(index.target_lens_[j]);
+	    tpm2.push_back(tpm[j]);
+	  }
+	  plaintext_writer(opt.output + "/abundance2.tsv", name2, alpha2, eff_lens2, target_lens2, tpm2);
+	}
+
         plaintext_writer(opt.output + "/abundance.tsv", em.target_names_,
-            em.alpha_, em.eff_lens_, index.target_lens_);
+			 em.alpha_, em.eff_lens_, index.target_lens_, tpm);
 
         if (opt.bootstrap > 0) {
           auto B = opt.bootstrap;
@@ -1218,8 +1278,11 @@ int main(int argc, char *argv[]) {
               if (!opt.plaintext) {
                 writer.write_bootstrap(res, b);
               } else {
+
+		std::vector<double> tpm = counts_to_tpm(res.alpha_, em.eff_lens_);
+
                 plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
-                    em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
+				 em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_, tpm);
               }
             }
           }
@@ -1291,9 +1354,11 @@ int main(int argc, char *argv[]) {
               std::string(std::to_string(index.INDEX_VERSION)),
               start_time,
               call);
+
+	  std::vector<double> tpm = counts_to_tpm(em.alpha_, em.eff_lens_);
 
           plaintext_writer(opt.output + "/abundance.tsv", em.target_names_,
-              em.alpha_, em.eff_lens_, index.target_lens_);
+			   em.alpha_, em.eff_lens_, index.target_lens_, tpm);
         }
 
         if (opt.bootstrap > 0) {
@@ -1328,8 +1393,10 @@ int main(int argc, char *argv[]) {
               if (!opt.plaintext) {
                 writer.write_bootstrap(res, b);
               } else {
+		std::vector<double> tpm = counts_to_tpm(res.alpha_, em.eff_lens_);
+
                 plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
-                    em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
+				 em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_, tpm);
               }
             }
           }