Skip to content

Commit

Permalink
better XSD integration
Browse files Browse the repository at this point in the history
  • Loading branch information
Henric Zazzi committed Apr 23, 2014
1 parent c5b5f94 commit d3abcea
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 70 deletions.
66 changes: 32 additions & 34 deletions pout_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,57 @@ namespace poutXML {
probability_t=s;
}
//------------------------------------------------------------------------------
void psm_ids_pimpl::pre(PercolatorOutI* percolator,string* peptide_q_value,string* peptide_pep,bool* peptide_decoy,
void peptide_pimpl::pre(probability_t_pimpl* probability_p) {
this->probability_p=probability_p;
}
//------------------------------------------------------------------------------
void peptide_pimpl::q_value() {
pq_value=probability_p->probability_t;
}
//------------------------------------------------------------------------------
void peptide_pimpl::pep() {
ppep=probability_p->probability_t;
}
//------------------------------------------------------------------------------
void peptide_pimpl::decoy(bool decoy) {
pdecoy=decoy;
}
//------------------------------------------------------------------------------
void psm_ids_pimpl::pre(PercolatorOutI* percolator,peptide_pimpl* peptide_p,
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>& pout_values) {
this->percolator=percolator;
this->peptide_q_value=peptide_q_value;
*this->peptide_q_value="";
this->peptide_pep=peptide_pep;
*this->peptide_pep="";
this->peptide_decoy=peptide_decoy;
this->peptide_p=peptide_p;
this->peptide_p->pq_value="";
this->peptide_p->ppep="";
this->pout_values=&pout_values;
}
//------------------------------------------------------------------------------
void psm_ids_pimpl::post_psm_ids() {
*peptide_pep="";
*peptide_q_value="";
peptide_p->pq_value="";
peptide_p->ppep="";
}
//------------------------------------------------------------------------------
void psm_ids_pimpl::psm_id(const ::std::string& psm_id) {
string psmid,psmidfile;

if (!percolator->checkDecoy(*peptide_decoy))
if (!percolator->checkDecoy(peptide_p->pdecoy))
return;
psmid=percolator->convertPSMID(psm_id);
if (psmid.length()==0)
THROW_ERROR_VALUE(PRINT_TEXT::WRONG_FORMAT_PSM,psm_id);
psmidfile=percolator->convertPSMIDFileName(psm_id);
if (psmidfile.length()==0)
THROW_ERROR_VALUE(PRINT_TEXT::NO_UNIQUE_MZID_FILE,psm_id);
if (peptide_pep->length()>0)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEPTIDE_PEP)]=*peptide_pep;
if (peptide_q_value->length()>0)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEPTIDE_Q_VALUE)]=*peptide_q_value;
if (peptide_p->ppep.length()>0)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEPTIDE_PEP)]=peptide_p->ppep;
if (peptide_p->pq_value.length()>0)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEPTIDE_Q_VALUE)]=peptide_p->pq_value;
}
//------------------------------------------------------------------------------
void psm_pimpl::pre(PercolatorOutI* percolator,string* probability_t,
void psm_pimpl::pre(PercolatorOutI* percolator,probability_t_pimpl* probability_p,
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>& pout_values) {
this->percolator=percolator;
this->probability_t=probability_t;
this->probability_p=probability_p;
this->pout_values=&pout_values;
}
//------------------------------------------------------------------------------
Expand All @@ -60,17 +74,17 @@ namespace poutXML {
//------------------------------------------------------------------------------
void psm_pimpl::q_value() {
if (psm_decoy)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::Q_VALUE)]=global::to_string(*probability_t);
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::Q_VALUE)]=probability_p->probability_t;
}
//------------------------------------------------------------------------------
void psm_pimpl::pep() {
if (psm_decoy)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEP)]=global::to_string(*probability_t);
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::PEP)]=probability_p->probability_t;
}
//------------------------------------------------------------------------------
void psm_pimpl::p_value() {
if (psm_decoy)
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::P_VALUE)]=global::to_string(*probability_t);
(*pout_values)[PercolatorOutFeatures(psmidfile,psmid,PERCOLATOR_PARAM::P_VALUE)]=probability_p->probability_t;
}
//------------------------------------------------------------------------------
void psm_pimpl::psm_id(const ::std::string& psm_id) {
Expand All @@ -85,21 +99,5 @@ namespace poutXML {
void psm_pimpl::decoy(bool decoy) {
psm_decoy=percolator->checkDecoy(decoy);
}
//------------------------------------------------------------------------------
void peptide_pimpl::pre(string* probability_t) {
this->probability_t=probability_t;
}
//------------------------------------------------------------------------------
void peptide_pimpl::q_value() {
peptide_q_value=*probability_t;
}
//------------------------------------------------------------------------------
void peptide_pimpl::pep() {
peptide_pep=*probability_t;
}
//------------------------------------------------------------------------------
void peptide_pimpl::decoy(bool decoy) {
peptide_decoy=decoy;
}
//------------------------------------------------------------------------------
}
36 changes: 17 additions & 19 deletions pout_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,27 @@ namespace poutXML {

virtual void _characters (const ::xml_schema::ro_string&);
};
//------------------------------------------------------------------------------
class peptide_pimpl: public virtual peptide_pskel {
public:
probability_t_pimpl* probability_p;
string pq_value;
string ppep;
bool pdecoy;

virtual void pre(probability_t_pimpl* probability_p);
virtual void q_value();
virtual void pep();
virtual void decoy(bool);
};
//------------------------------------------------------------------------------
class psm_ids_pimpl: public virtual psm_ids_pskel {
public:
PercolatorOutI* percolator;
peptide_pimpl* peptide_p;
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>* pout_values;
string* peptide_q_value;
string* peptide_pep;
bool* peptide_decoy;

virtual void pre(PercolatorOutI* percolator,string* peptide_q_value,string* peptide_pep,bool* peptide_decoy,
virtual void pre(PercolatorOutI* percolator,peptide_pimpl* peptide_p,
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>& pout_values);
virtual void psm_id(const ::std::string&);
virtual void post_psm_ids();
Expand All @@ -39,12 +50,12 @@ namespace poutXML {
class psm_pimpl: public virtual psm_pskel {
public:
PercolatorOutI* percolator;
string* probability_t;
probability_t_pimpl* probability_p;
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>* pout_values;
string psmid,psmidfile;
bool psm_decoy;

virtual void pre(PercolatorOutI* percolator,string* probability_t,
virtual void pre(PercolatorOutI* percolator,probability_t_pimpl* probability_p,
boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>& pout_values);
virtual void svm_score(double);
virtual void q_value();
Expand All @@ -53,19 +64,6 @@ namespace poutXML {
virtual void psm_id (const ::std::string&);
virtual void decoy(bool);
};
//------------------------------------------------------------------------------
class peptide_pimpl: public virtual peptide_pskel {
public:
string* probability_t;
string peptide_q_value;
string peptide_pep;
bool peptide_decoy;

virtual void pre(string* probability_t);
virtual void q_value();
virtual void pep();
virtual void decoy(bool);
};
//------------------------------------------------------------------------------
class aa_seq_t_pimpl: public virtual aa_seq_t_pskel,public ::xml_schema::string_pimpl {};
//------------------------------------------------------------------------------
Expand Down
41 changes: 24 additions & 17 deletions xmlio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,42 +112,49 @@ string MzIDIO::setOutputFileName(int mzidfilenameid) {
bool MzIDIO::insertMZIDValues(boost::unordered_map<PercolatorOutFeatures, string, PercolatorOutFeatures>& pout_values) {
FILE *fpr,*fpw;
string mzidname;
char s1[global::MAX_CHAR_SIZE],s2[global::MAX_CHAR_SIZE];
int i1,vi1,n;
char s1[global::MAX_CHAR_SIZE],psmid[global::MAX_CHAR_SIZE];
int i1,vi1,n,xmlindent;

n=0;
psmid[0]=0;
try {
for (vi1=0; vi1<filename.size(); vi1++) {
mzidname=boost::lexical_cast<boost::filesystem::path>(filename[vi1]).stem().string();
fpr=fopen(filename[vi1].c_str(),"r");
fpw=fopen(setOutputFileName(vi1).c_str(),"w");
while (fgets(s1,global::MAX_CHAR_SIZE,fpr)!=NULL) {
fprintf(fpw,"%s",s1);
if (strstr(s1," id=\"")!=NULL && strstr(s1,"<SpectrumIdentificationItem ")!=NULL) {
strcpy(s2,strstr(s1," id=\"")+5);
*strchr(s2,'\"')=0;
if (strstr(s1,MZID_PARAM::END_INSERT_TAG)!=NULL) {
if (strlen(psmid)==0)
THROW_ERROR(PRINT_TEXT::CANNOT_INSERT);
for (i1=0; i1<ARRAYSIZE(MZID_PARAM::ELEMENT_DATA::ELEMENTS); i1++) {
if (pout_values.find(PercolatorOutFeatures(mzidname,s2,i1))==pout_values.end())
if (pout_values.find(PercolatorOutFeatures(mzidname,psmid,i1))==pout_values.end())
continue;
n++;
switch (MZID_PARAM::ELEMENT_DATA::ELEMENTS[i1]) {
case MZID_PARAM::CVPARAM: {
fprintf(fpw,"<cvparam accession=\"%s\" cvref==\"%s\" name=\"%s\" value=\"%s\"/>\n",
fprintf(fpw,MZID_PARAM::CVPARAM_TAG,string(xmlindent,' ').c_str(),
MZID_PARAM::ELEMENT_DATA::ACCESSIONS[i1].c_str(),
MZID_PARAM::ELEMENT_DATA::CVREFS[i1].c_str(),
MZID_PARAM::ELEMENT_DATA::NAMES[i1].c_str(),
global::to_string(pout_values[PercolatorOutFeatures(mzidname,s2,i1)]).c_str());
pout_values[PercolatorOutFeatures(mzidname,psmid,i1)].c_str());
break;
}
case MZID_PARAM::USERPARAM: {
fprintf(fpw,"<userparam name=\"%s\" value=\"%s\"/>\n",
fprintf(fpw,MZID_PARAM::USERPARAM_TAG,string(xmlindent,' ').c_str(),
MZID_PARAM::ELEMENT_DATA::NAMES[i1].c_str(),
global::to_string(pout_values[PercolatorOutFeatures(mzidname,s2,i1)]).c_str());
pout_values[PercolatorOutFeatures(mzidname,psmid,i1)].c_str());
break;
}
}
pout_values.erase(PercolatorOutFeatures(mzidname,s2,i1));
pout_values.erase(PercolatorOutFeatures(mzidname,psmid,i1));
}
psmid[0]=0;
}
xmlindent=strspn(s1," ");
fprintf(fpw,"%s",s1);
if (strstr(s1,MZID_PARAM::PSMID_TAG)!=NULL && strstr(s1,MZID_PARAM::START_INSERT_TAG)!=NULL) {
strcpy(psmid,strstr(s1,MZID_PARAM::PSMID_TAG)+strlen(MZID_PARAM::PSMID_TAG));
*strchr(psmid,'\"')=0;
}
}
fclose(fpr);
Expand Down Expand Up @@ -217,9 +224,9 @@ bool PercolatorOutI::getPoutValues(boost::unordered_map<PercolatorOutFeatures, s
poutXML::probability_t_pimpl probability_t_p;

try {
psm_p.pre(this,&probability_t_p.probability_t,pout_values);
psm_ids_p.pre(this,&peptide_p.peptide_q_value,&peptide_p.peptide_pep,&peptide_p.peptide_decoy,pout_values);
peptide_p.pre(&probability_t_p.probability_t);
psm_p.pre(this,&probability_t_p,pout_values);
psm_ids_p.pre(this,&peptide_p,pout_values);
peptide_p.pre(&probability_t_p);
pout_p.parsers (process_info_p,psms_p,peptides_p,proteins_p,
string_p,u_short_p,u_short_p);
psms_p.parsers (psm_p);
Expand All @@ -229,8 +236,8 @@ bool PercolatorOutI::getPoutValues(boost::unordered_map<PercolatorOutFeatures, s
peptide_p.parsers(double_p,probability_t_p,probability_t_p,double_p,double_p,retentionTime_p,
string_p,probability_t_p,psm_ids_p,aa_seq_t_p,boolean_p);
psm_ids_p.parsers(string_p);
xml_schema::document doc_p (pout_p,PERCOLATOR_PARAM::SCHEMA_NAME,PERCOLATOR_PARAM::HEAD_TAG);
doc_p.parse (filename.c_str(),validatexml);
xml_schema::document doc_p(pout_p,PERCOLATOR_PARAM::SCHEMA_NAME,PERCOLATOR_PARAM::HEAD_TAG);
doc_p.parse(filename.c_str(),validatexml);
clog << boost::format(PRINT_TEXT::TOTAL_READ) % pout_values.size() << endl;
return true;
}
Expand Down
5 changes: 5 additions & 0 deletions xmlio.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ using namespace std;
namespace MZID_PARAM {
const char SCHEMA_NAME[]="http://psidev.info/psi/pi/mzIdentML/1.1";
const char SCHEMA[]="mzIdentML1.1.0.xsd";
const char START_INSERT_TAG[]="<SpectrumIdentificationItem ";
const char END_INSERT_TAG[]="</SpectrumIdentificationItem>";
const char PSMID_TAG[]=" id=\"";
const char CVPARAM_TAG[]="%s<cvparam accession=\"%s\" cvref==\"%s\" name=\"%s\" value=\"%s\"/>\n";
const char USERPARAM_TAG[]="%s<userparam name=\"%s\" value=\"%s\"/>\n";
enum ELEMENT_TYPE { USERPARAM,CVPARAM };

namespace ELEMENT_DATA {
Expand Down

0 comments on commit d3abcea

Please sign in to comment.