/*                                                                                                 */
/*  muxQuant - relative quantitation after alignment of MS and MS/MS datasets                      */
/*                                                                                                 */
/* Copyright (c) Magnus Palmblad 2007 (uses the ISB RAMP library for reading mzXML files and parts */ 
/* from Pep3D for accessing data in mzXML)                                                         */ 
/*                                                                                                 */
/* This program is free software; you can redistribute it and/or modify it under the terms of the  */
/* GNU General Public License as published by the Free Software Foundation; either version 2 of    */
/* the License, or (at your option) any later version.                                             */
/*                                                                                                 */
/* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;       */
/* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.       */
/* See the GNU General Public License for more details.                                            */
/*                                                                                                 */
/* You should have received a copy of the GNU General Public License along with this program; if   */
/* not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA       */
/* 02111-1307, USA.                                                                                */
/*                                                                                                 */
/* Contact information: magnus.palmblad@gmail.com                                                  */
/*                                                                                                 */
/*                                                                                                 */
/* compile with gcc -o muxQuant base64.c ramp.c muxQuant.c -I. -lm -lz -lfftw3 -lgsl               */
/*                                                                                                 */
/* TODO list (suggested improvements)                                                              */
/* 1) clean up code (change variable names, move code to main(), ...                               */
/* 2) improved support for search engines other than Mascot                                        */
/* 3) interpolation to get better mass accuracy in MS (presently "max" peak picking)               */
/* 4) add support for C-13 isotopic labeling (reserved flag -C, function not implemented yet)      */
/*                                                                                                 */


#include <stdio.h>
#include <stdlib.h>  
#include <ctype.h>
#include <string.h>
#include <math.h>
#include <fftw3.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_multifit.h>
#include "ramp.h"
#include "base64.h"

#define HPLUS_MASS 1.00727646688
#define AMINO_ACIDS "ARNDCEQGHILKMFPSTWYV"
#define MAX_PEPTIDE_LENGTH 300
#define N15 0.997035 /* difference between N-15 and N-14 */
#define MAX_BREAKPOINTS 12
#define MAX_MULTIPLEX 10
#define WINDOW_WIDTH 40
#define MS_SCAN_WINDOW 100
#define MAX_MASS 8192


/* mzXML read functions adopted from Pep3D */

typedef struct {
  int size;
  double * xval;
  double * yval;
} spectStrct;

void freeSpectStrct(spectStrct spectrum)
{
  free(spectrum.xval);
  free(spectrum.yval);

  return;
}

ramp_fileoffset_t *pScanIndex;
int iLastScan;
struct ScanHeaderStruct scanHeader;
struct RunHeaderStruct runHeader;
ramp_fileoffset_t indexOffset;
spectStrct *spects;
double *wghs;
int spectNum;
long i;
RAMPREAL *pPeaks;
int n, n_MS_spectra=0;
RAMPFILE *pFI;

int getMsSpect(spectStrct *msSpect, RAMPFILE *pFI, int scanNum[2])
{
  void getCmbSpect(spectStrct *cmbSpect, int spectNum, spectStrct *spects, double *wghs);

  msSpect->size = -1;

  // initialize
  scanNum[0] = scanNum[0] > 1 ? scanNum[0] : 1;
  scanNum[1] = scanNum[1] < iLastScan ? scanNum[1] : iLastScan;
  spectNum = scanNum[1] - scanNum[0] + 1;
  if(spectNum < 1){
    printf("invalid scan number: %d-%d (full scan range: 1-%d)\n",scanNum[0], scanNum[1], iLastScan);
    fflush(stdout);
    free (pScanIndex);
    return -1;    
  }

  spects = (spectStrct *) calloc(spectNum, sizeof(spectStrct));
  spectNum = 0;
  for (i = scanNum[0]; i <= scanNum[1]; ++i) 
    {
      if((scanHeader.msLevel==1)&&(scanHeader.peaksCount>0)) /* MS ? */
	{                         
	  spects[spectNum].size = scanHeader.peaksCount;
	  spects[spectNum].xval = (double *) calloc(spects[spectNum].size, sizeof(double));
	  spects[spectNum].yval = (double *) calloc(spects[spectNum].size, sizeof(double));
	  
	  pPeaks = readPeaks (pFI, pScanIndex[i]);
	  
	  spects[spectNum].size = 0;
	  n = 0;
	  while (pPeaks[n] != -1)
	    {
	      spects[spectNum].xval[spects[spectNum].size] = pPeaks[n];
	      n++;
	      spects[spectNum].yval[spects[spectNum].size] = pPeaks[n];
	      n++;
	      ++(spects[spectNum].size);
	    }
	  free (pPeaks);
	  n_MS_spectra++;
	  if(spects[spectNum].size > 0) ++spectNum; 
	  else freeSpectStrct(spects[spectNum]);
	}
      
    } 
  
  if(spectNum > 0) {
    wghs = (double *) calloc(spectNum, sizeof(double));
    for (i = 0; i < spectNum; ++i)
      wghs[i] = 1.;
    getCmbSpect(msSpect, spectNum, spects, wghs);
    free(wghs);
  }
  else 
    {
      // printf("cannot find an MS spectrum\n"); fflush(stdout);
      for (i = 0; i < spectNum; ++i) freeSpectStrct(spects[i]);
      free(spects);
      return -1;
    }
  
  for (i = 0; i < spectNum; ++i) freeSpectStrct(spects[i]);
  free(spects);
  
  return 0;
}


int getCidSpect(double *mz, double *et, spectStrct *cidSpect, RAMPFILE *pFI, int scanNum[2])
{
  void getCmbSpect(spectStrct *cmbSpect,int spectNum, spectStrct *spects, double *wghs);

  cidSpect->size = -1;

  scanNum[0] = scanNum[0] > 1 ? scanNum[0] : 1;
  scanNum[1] = scanNum[1] < iLastScan ? scanNum[1] : iLastScan;
  spectNum = scanNum[1] - scanNum[0] + 1;
  if(spectNum < 1){
    printf("invalid scan number: %d-%d (full scan range: 1-%d)\n",scanNum[0], scanNum[1], iLastScan);
    fflush(stdout);
    free (pScanIndex);
    return -1;    
  }
  spects = (spectStrct *) calloc(spectNum, sizeof(spectStrct));
  *mz = 0.;
  *et = 0.;
  spectNum = 0;
  for (i = scanNum[0]; i <= scanNum[1]; ++i) 
    {
      if((scanHeader.msLevel==2)&&(scanHeader.peaksCount>0)) 
	{                         
	  *mz += scanHeader.precursorMZ;
	  *et += scanHeader.retentionTime/60;
	  
	  spects[spectNum].size = scanHeader.peaksCount;
	  spects[spectNum].xval = (double *) calloc(spects[spectNum].size, sizeof(double));
	  spects[spectNum].yval = (double *) calloc(spects[spectNum].size, sizeof(double));
	  
	  pPeaks = readPeaks (pFI, pScanIndex[i]);
	  
	  spects[spectNum].size = 0;
	  n = 0;
	  while (pPeaks[n] != -1)
	    {
	      spects[spectNum].xval[spects[spectNum].size] = pPeaks[n];
	      n++;
	      spects[spectNum].yval[spects[spectNum].size] = pPeaks[n];
	      n++;
	      ++(spects[spectNum].size);
	    }
	  free (pPeaks);
	  
	  if(spects[spectNum].size > 0) ++spectNum;
	  else freeSpectStrct(spects[spectNum]);
	} 
      
    } 
  
  if(spectNum > 0) {
    *mz /= spectNum;
    *et /= spectNum;
    wghs = (double *) calloc(spectNum, sizeof(double));
    for (i = 0; i < spectNum; ++i)
      wghs[i] = 1.;
    getCmbSpect(cidSpect, spectNum, spects, wghs);
    free(wghs);
  }
  else 
    {
      printf("cannot find an MS/MS spectrum\n"); fflush(stdout); 
      for (i = 0; i < spectNum; ++i) freeSpectStrct(spects[i]);
      free(spects);
      return -1;
    }
  
  for (i = 0; i < spectNum; ++i) freeSpectStrct(spects[i]);
  free(spects);
  
  return 0;
}


void getCmbSpect(spectStrct *cmbSpect, int spectNum, spectStrct *spects, double *wghs)
{
  void copySpectStrct(spectStrct * tgtSpect, spectStrct srcSpect);
  spectStrct tmpSpect[2];
  int indx, indx1, indx2;
  double tmpWghs[2] = {1., 1.};
  int i;
  
  if (spectNum < 1)
    return;
  
  // single spectrum
  if(spectNum == 1) {
    copySpectStrct(cmbSpect, spects[0]);
    if(wghs[0] != 1.) {
      for(i = 0; i < cmbSpect->size; ++i)
	cmbSpect->yval[i] *= wghs[0];
    }
    return;
  } 
  
  // 2 spectra
  if(spectNum == 2) {
    tmpSpect[0].size = spects[0].size + spects[1].size;
    tmpSpect[0].xval = (double *) calloc(tmpSpect[0].size, sizeof(double));
    tmpSpect[0].yval = (double *) calloc(tmpSpect[0].size, sizeof(double));

    indx1 = 0;
    indx2 = 0;
    indx = 0;
    while(indx1 < spects[0].size || indx2 < spects[1].size) {
      
      if(indx1 >= spects[0].size){
	tmpSpect[0].xval[indx] = spects[1].xval[indx2];
	tmpSpect[0].yval[indx] = spects[1].yval[indx2]*wghs[1];      
	++indx2;
	++indx;
      }
      else if (indx2 >= spects[1].size) {
	tmpSpect[0].xval[indx] = spects[0].xval[indx1];
	tmpSpect[0].yval[indx] = spects[0].yval[indx1]*wghs[0];      
	++indx1;
	++indx;
      }
      else if(spects[0].xval[indx1] == spects[1].xval[indx2]) {
	tmpSpect[0].xval[indx] = spects[0].xval[indx1];
	tmpSpect[0].yval[indx] = spects[0].yval[indx1]*wghs[0] 
	  + spects[1].yval[indx2]*wghs[1];      
	++indx1;
	++indx2;
	++indx;
      }
      else if(spects[0].xval[indx1] < spects[1].xval[indx2]) {
	tmpSpect[0].xval[indx] = spects[0].xval[indx1];
	tmpSpect[0].yval[indx] = spects[0].yval[indx1]*wghs[0];      
	++indx1;
	++indx;
      }
      else {
	tmpSpect[0].xval[indx] = spects[1].xval[indx2];
	tmpSpect[0].yval[indx] = spects[1].yval[indx2]*wghs[1];      
	++indx2;
	++indx;
      }
    } 
    tmpSpect[0].size = indx;
    
    copySpectStrct(cmbSpect, tmpSpect[0]);
    freeSpectStrct(tmpSpect[0]);

    return;
  }

  // at least three spectra
  indx1 = spectNum/2;
  indx2 = spectNum - spectNum/2;
  getCmbSpect(&(tmpSpect[0]), indx1, spects, wghs);
  getCmbSpect(&(tmpSpect[1]), indx2, spects+indx1, wghs+indx1);
  getCmbSpect(cmbSpect, 2, tmpSpect, tmpWghs);
  
  freeSpectStrct(tmpSpect[0]);
  freeSpectStrct(tmpSpect[1]);

  return;
}

void copySpectStrct(spectStrct * tgtSpect, spectStrct srcSpect)
{
  int i;

  tgtSpect->size = srcSpect.size;
  tgtSpect->xval = (double *) calloc(tgtSpect->size, sizeof(double));
  tgtSpect->yval = (double *) calloc(tgtSpect->size, sizeof(double));

  for (i = 0; i < tgtSpect->size; ++i) {
    tgtSpect->xval[i] = srcSpect.xval[i];
    tgtSpect->yval[i] = srcSpect.yval[i];
  }

  return;
}


/* main program starts here */

int main(int argc, char *argv[]) 
{
  FILE *inp, *outp;

  typedef struct {
    char sequence[MAX_PEPTIDE_LENGTH];
    char protein[200];
    double mz;                               /* calculated exact m/z */
    double ionscore;                         /* ionscore form Mascot */
    double total_modification_mass;
    double scan;
    int z;
  } peptide_type;

  typedef struct {
    double B[MAX_BREAKPOINTS][2];
    int nB;
  } PLF_type; /* piecewise linear function */

  PLF_type best_C;
  peptide_type peptide[1000], unique_peptide[800];
  spectStrct mzXML_spectrum, temp_spectrum;
  RAMPFILE *mzXML_file;
  char pepXML_filename[100], mzXML_filename[100], output_filename[100], best_C_filename[100], fN15_filename[100], list_filename[100], background[15];
  char line[4000], full_line[4000], *p, temp[30], sequence[MAX_PEPTIDE_LENGTH];
  ramp_fileoffset_t offset, *scan_index;
  struct ScanHeaderStruct scan_header;
  struct RunHeaderStruct run_header;
  int nographics, maximum, start_scan, end_scan, pepXML_MS_scan, MS_start_scan, MS_end_scan, assumed_charge, range[2], n_peptides, n_unique_peptides, *SIC_argmax, seen_before, CHARGE;
  int bp1, bp2, mzXML_MS_scan, mzXML_argmax, isotope, n_multiplex, list_mode, z; /* isotope = 14 for N-14, 15 for N-15 peptide identifications */
  long i, j, n, k;
  double ionscore, min_ion_score, intensity_sum, distribution_sum, distribution_max_sum, *SIC_max, max_mass_measurement_error, intensity_max, PEPMASS, mzXML_mz, m, k_slope, mz;
  double distribution[WINDOW_WIDTH], distribution_max[WINDOW_WIDTH], normalized_distribution[WINDOW_WIDTH], calc_distribution[MAX_MULTIPLEX][WINDOW_WIDTH], integration_window, total_modification_mass, fN15[MAX_MULTIPLEX], background_mean, background_sd, sum;
  char start_scan_string[9], end_scan_string[9], assumed_charge_string[3], peptide_string[MAX_PEPTIDE_LENGTH], protein_string[200], modification_mass_string[20], ionscore_string[10]; 
  
  unsigned char aa[20][5]={{5,3,1,1,0}, /* amino acid compositions */
			   {12,6,4,1,0},
			   {6,4,2,2,0},
			   {5,4,1,3,0},
			   {5,3,1,1,1}, /* normal cysteine */
			   {7,5,1,3,0},
			   {8,5,2,2,0},
			   {3,2,1,1,0},
			   {7,6,3,1,0},
			   {11,6,1,1,0},
			   {11,6,1,1,0},
			   {12,6,2,1,0},
			   {9,5,1,1,1},
			   {9,9,1,1,0},
			   {7,5,1,1,0},
			   {5,3,1,2,0},
			   {7,4,1,2,0},
			   {10,11,2,1,0},
			   {9,9,1,2,0},
			   {9,5,1,1,0}};
  double mimass[5]={1.0078250321,12,14.0030740052,15.9949146221,31.97207069};
  int fragment[5]; 




  double chisq;

  gsl_matrix *X, *cov;
  gsl_vector *yy, *ww, *cc; 
  gsl_multifit_linear_workspace *work;


  
  double mass(char *sequence)
  {
    int a,b,i;
    double result;
    for(i=0;i<5;i++) fragment[i]=0;

    for(i=0;i<strlen(sequence);i++) /* generate molecular formula for fragment */
    {
      a=20-strlen(strchr(AMINO_ACIDS,sequence[i]));
      for(b=0;b<5;b++) fragment[b]=fragment[b]+aa[a][b]; 
    } 
    
    fragment[0]=fragment[0]+2; fragment[3]++;  /* add H2O */
  
  
    /* calculate integer mass based on molecular formula */ 
  
    result=0.0;
    for(b=0;b<5;b++) result+=fragment[b]*mimass[b];

  return result;
  }

  void N15_distribution(double *out_distribution, char *fragment_sequence, int charge, double fN15) 
  {
    #define MAX_ELEMENTS 5 /* taking both natural and solution deuterium into consideration */
    
    int a, b, j, k, integer_mass, seqlen, ec;
    long i, n;
    
    int fragment[5]; /* for elemental composition of fragment */
    
    double out_sum, mi_mass;
    fftw_complex *element[MAX_ELEMENTS], *FFTelement[MAX_ELEMENTS], power[MAX_ELEMENTS], prod[MAX_ELEMENTS], out[MAX_MASS];
    fftw_complex *FFTfragment, c;
    fftw_plan pp, ppb;
    
    
    void mulc(fftw_complex c1, fftw_complex c2, fftw_complex prod)
    {
      prod[0]=c1[0]*c2[0]-c1[1]*c2[1];
      prod[1]=c1[0]*c2[1]+c1[1]*c2[0];
    }
    
    void powc(fftw_complex c,int w, fftw_complex prod)
    { 
      double phi=0.0,r;
      
      if(c[0]==0 && c[1]==0) return;
      else
	if(c[0]==0 && c[1] > 0) phi=M_PI_2;
	else 
	  if(c[0]==0 && c[1] < 0) phi=-M_PI_2;
      
      if(phi==0.0) phi=atan2(c[1],c[0]);
      r=pow(sqrt(c[0]*c[0]+c[1]*c[1]),w);
      prod[0]=r*cos(w*phi); prod[1]=r*sin(w*phi);
    }
    
    mi_mass=mass(fragment_sequence)+charge*HPLUS_MASS; /* mass(...) is neutral mass, add protons */
    
    
    /* dynamic memory allocation of FFTW arrays*/ 
    
    for(i=0;i<MAX_ELEMENTS;i++) 
      {
	element[i] = (fftw_complex *) malloc(MAX_MASS * sizeof(fftw_complex));
	FFTelement[i] = (fftw_complex *) malloc(MAX_MASS * sizeof(fftw_complex));
      }
    
    FFTfragment = (fftw_complex *) malloc(MAX_MASS * sizeof(fftw_complex));
    
      
    /* isotopic distributions of the elements H, C, N, O and S */
    
    for(i=0;i<MAX_ELEMENTS;i++) for(j=0;j<MAX_MASS;j++) {element[i][j][0]=0; element[i][j][1]=0;}
    element[0][1][0]=0.9998443;  element[0][2][0]=0.0001557;
    element[1][12][0]=0.98889;   element[1][13][0]=0.01111;
    element[2][14][0]=1.0-fN15;  element[2][15][0]=fN15; /* custom for N-15 */
    element[3][16][0]=0.997628;  element[3][17][0]=0.000372;  element[3][18][0]=0.002000;
    element[4][32][0]=0.95018;   element[4][33][0]=0.00750;   element[4][34][0]=0.04215; element[4][36][0]=0.00017;
    
    
    /* calculate the FFT's of each elements isotopic distribution */
    
    for(i=0;i<MAX_ELEMENTS;i++) {
      pp=fftw_plan_dft_1d(MAX_MASS, element[i], FFTelement[i], FFTW_FORWARD, FFTW_ESTIMATE);  
      fftw_execute(pp);
    }
    
    seqlen=strlen(fragment_sequence); /* length of sequence */
      
    for(b=0;b<MAX_ELEMENTS;b++) fragment[b]=0;
    
    for(i=0;i<seqlen;i++) /* generate molecular formula for fragment */
      {
	a=20-strlen(strchr(AMINO_ACIDS,fragment_sequence[i]));
	for(b=0;b<5;b++) 
	  {
	    fragment[b]=fragment[b]+aa[a][b]; 
	  }
      }
    fragment[0]+=(2+charge); fragment[3]+=1; /* add H2O and H+ (+charge) */
    
    integer_mass=fragment[0]+12*fragment[1]+14*fragment[2]+16*fragment[3]+32*fragment[4]; /* with protons */
    
    /* calculate isotopic distribution of fragment */
    
    for(i=0;i<MAX_MASS;i++) {
      powc(FFTelement[0][i],fragment[0],power[0]);
      powc(FFTelement[1][i],fragment[1],power[1]);
      powc(FFTelement[2][i],fragment[2],power[2]);
      powc(FFTelement[3][i],fragment[3],power[3]);
      powc(FFTelement[4][i],fragment[4],power[4]);
      mulc(power[0],power[1],prod[0]);
      mulc(power[2],power[3],prod[1]);
      mulc(power[4],prod[0],prod[2]);
      mulc(prod[1],prod[2],FFTfragment[i]);
    }
    
    /* calculate fragment isotopic distribution */
    
    ppb=fftw_plan_dft_1d(MAX_MASS, FFTfragment, out, FFTW_BACKWARD, FFTW_ESTIMATE);  
    fftw_execute(ppb);
    out_sum=0;
    for(i=-3;i<WINDOW_WIDTH-3;i++) out_sum+=out[(int)floor(0.5+i*N15+integer_mass)][0]; 
    for(i=-3;i<WINDOW_WIDTH-3;i++) out[(int)floor(0.5+i*N15+integer_mass)][0]/=out_sum;

    // printf("calc. distribution (z=%d) [",charge);
    // for(i=-3;i<WINDOW_WIDTH-3;i++) printf("%1.4f ",out[(int)floor(0.5+i*N15+integer_mass)][0]);
    // printf("]\n");
  
    for(i=-3;i<WINDOW_WIDTH-3;i++) out_distribution[i+3]=out[(int)floor(0.5+i*N15+integer_mass)][0];
    
    
    /* cleanup */
    
    fftw_destroy_plan(pp);
    fftw_destroy_plan(ppb);
    
    return;
  }

  

  
  /* parsing command line parameters */
  
  if( (argc==2) && ( (strcmp(argv[1],"--help")==0) || (strcmp(argv[1],"-help")==0) || (strcmp(argv[1],"-h")==0)) ) /* want help? */
    {
      printf("muxQuant - (c) Magnus Palmblad 2007-\n\nusage: muxQuant -N<14|15> -p<MS/MS pepXML filename> -m<MS dataset in mzXML> -a<alignment function> -f<fraction N-15 filename> -e<max. mass difference between MS and MS/MS datasets (in ppm)> -I<integration window (in ppm)> [-i<ion score cutoff> -b<background mean>,<background s.d.> -R<MS start scan>,<MS end scan> -o<output QuantN15 file> -maximum -nographics] (type muxQuant --help for more information)\n\nfor additional information, see http://www.ms-utils.org/muxQuant or e-mail magnus.palmblad@gmail.com\n");
      return 0;
    }
  
  if (argc<2 || argc>28) /* check for incorrect number of parameters */
    {
      printf("usage: muxQuant -N<14|15> -p<MS/MS pepXML filename> -m<MS dataset in mzXML> -a<alignment function> -f<fraction N-15 filename> -e<max. mass difference between MS and MS/MS datasets (in ppm)> -I<integration window (in ppm)> [-i<ion score cutoff> -b<background mean>,<background s.d.> -R<MS start scan>,<MS end scan> -o<output QuantN15 file> -maximum -nographics] (type muxQuant --help for more information)\n");
      return -1;
    }

  /* add -C<12|13> flag for C-13 isotopic labeling */

  nographics=0; maximum=0; min_ion_score=30; list_mode=0; /* default values */
  for(i=1;i<argc;i++) {
    if( (argv[i][0]=='-') && (argv[i][1]=='L') ) {strcpy(list_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]); list_mode=1;}
    if( (argv[i][0]=='-') && (argv[i][1]=='p') ) strcpy(pepXML_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='a') ) strcpy(best_C_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='f') ) strcpy(fN15_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='m') ) 
      {
	if( strcmp(argv[i],"-maximum")==0 ) maximum=1; /* to use maximum in integration window rather than integrating the signal in the whole window */
	else
	  {
	    strcpy(mzXML_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]); 
	    strcpy(output_filename,mzXML_filename); strcat(output_filename,".quantitation"); /* default output filename */
	  }
      }
    if( (argv[i][0]=='-') && (argv[i][1]=='b') ) 
      {
	strcpy(background,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]); p=strtok(background,","); 
	background_mean=atof(p); p=strtok('\0',","); background_sd=atof(p);
      }
    if( (argv[i][0]=='-') && (argv[i][1]=='e') ) max_mass_measurement_error=atof(&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='I') ) integration_window=atof(&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='N') ) isotope=atoi(&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='i') ) min_ion_score=atof(&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( (argv[i][0]=='-') && (argv[i][1]=='R') ) 
      {
	strcpy(temp,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]); p=strtok(temp,","); 
	MS_start_scan=atof(p); p=strtok('\0',","); MS_end_scan=atof(p);
      }
    if( (argv[i][0]=='-') && (argv[i][1]=='o') ) strcpy(output_filename,&argv[strlen(argv[i])>2?i:i+1][strlen(argv[i])>2?2:0]);
    if( strcmp(argv[i],"-nographics")==0 ) nographics=1; /* to turn off graphics output (not used) */
  }
  

  if(!list_mode)
    {
      
      /* read pepXML file */

      if((inp = fopen(pepXML_filename, "r"))==NULL) {printf("error opening pepXML file %s",pepXML_filename); return -1;}
      
      printf("processing pepXML file %s...",pepXML_filename);fflush(stdout);  
      
      while(fgets(line,4000,inp)!=NULL) if(strstr(line,"</search_summary>")!=NULL) break; /* skip <search_summary> */
      
      i=0; n_peptides=0; total_modification_mass=0.0;
      while(fgets(line,4000,inp)!=NULL)
	{
	  if(strcmp(line,"\n")==0) continue; /* read past any blank lines */
	  p=strtok(line," =");
	  do {
	    if(strstr(p,"start_scan")!=NULL) {p=strtok('\0'," ="); strncpy(start_scan_string,p+1,strcspn(p+1,"\"")); start_scan=atoi(start_scan_string);}
	    if(strstr(p,"end_scan")!=NULL) {p=strtok('\0'," ="); strncpy(end_scan_string,p+1,strcspn(p+1,"\"")); end_scan=atoi(end_scan_string);}
	    if(strstr(p,"assumed_charge")!=NULL) {p=strtok('\0'," ="); strncpy(assumed_charge_string,p+1,strcspn(p+1,"\"")); assumed_charge=atoi(assumed_charge_string);}
	    if((strstr(p,"peptide")!=NULL)&&(strstr(p,"peptide_")==NULL)) {p=strtok('\0'," ="); strncpy(peptide_string,p+1,strcspn(p+1,"\"")); peptide_string[strcspn(p+1,"\"")]='\0';}
	    if((strstr(p,"protein")!=NULL)&&(strstr(p,"_protein")==NULL)) {p=strtok('\0'," ="); strncpy(protein_string,p+1,strcspn(p+1,"\"")); protein_string[strcspn(p+1,"\"")]='\0';}
	    if(strstr(p,"ionscore")!=NULL) {p=strtok('\0'," ="); p=strtok('\0'," ="); strncpy(ionscore_string,p+1,strcspn(p+1,"\"")); ionscore=atof(ionscore_string);}
	    if(strstr(p,"mod_aminoacid_mass")!=NULL) {p=strtok('\0'," ="); while(strstr(p,"mass")==NULL) p=strtok('\0'," ="); p=strtok('\0'," ="); strcspn(p+1,"\""); strncpy(modification_mass_string,p+1,strcspn(p+1,"\"")); total_modification_mass+=atof(modification_mass_string);} 
	    if(strstr(p,"/search_result")!=NULL) /* reached end of search hits, store last peptide (assume only top ranking in pepXML file) */
	      {
		if((ionscore>min_ion_score)&&(total_modification_mass==0)) /* could be extended to deal with modified peptides */
		  {
		    strcpy(peptide[i].sequence,peptide_string);
		    strcpy(peptide[i].protein,protein_string);
		    peptide[i].scan=(start_scan+end_scan)/2;
		    peptide[i].mz=(mass(peptide_string)+total_modification_mass+assumed_charge*HPLUS_MASS)/assumed_charge;
		    peptide[i].z=assumed_charge;
		    peptide[i].ionscore=ionscore;
		    i++;
		  }
		total_modification_mass=0.0; 
	      }
	    p=strtok('\0'," =");
	  }	while (p!=NULL);
	}
      n_peptides=i;
      close(inp);  
      printf("done (found %i peptide IDs in pepXML file)\nremoving redundant peptides...",n_peptides); fflush(stdout);
      
      strcpy(unique_peptide[0].sequence,peptide[0].sequence);
      strcpy(unique_peptide[0].protein,peptide[0].protein);
      unique_peptide[0].mz=peptide[0].mz;
      unique_peptide[0].ionscore=peptide[0].ionscore;
      unique_peptide[0].scan=peptide[0].scan;
      unique_peptide[0].z=peptide[0].z;
      
      n_unique_peptides=1;
      
      for(k=1;k<n_peptides;k++)
	{
	  seen_before=0;
	  for(j=0;j<n_unique_peptides;j++)
	    { 
	      if(strcmp(peptide[k].sequence,unique_peptide[j].sequence)==0) /* have we seen this peptide before? */
		{
		  seen_before=1;
		  if(peptide[k].ionscore>unique_peptide[j].ionscore) /* is the ion score higher than previous best for peptide? */
		    {
		      unique_peptide[j].ionscore=peptide[k].ionscore;
		      unique_peptide[j].scan=peptide[k].scan;
		    }
		}
	    }
	  
	  if(!seen_before) /* new peptide? */
	    {
	      strcpy(unique_peptide[n_unique_peptides].sequence,peptide[k].sequence);
	      strcpy(unique_peptide[n_unique_peptides].protein,peptide[k].protein);
	      unique_peptide[n_unique_peptides].mz=peptide[k].mz;
	      unique_peptide[n_unique_peptides].ionscore=peptide[k].ionscore;
	      unique_peptide[n_unique_peptides].scan=peptide[k].scan;
	      unique_peptide[n_unique_peptides].z=peptide[k].z;
	      n_unique_peptides++;
	    }
	}
      
      printf("done (found %i unique peptides)\n",n_unique_peptides);
      
      
      /* read alignment (best_C) */
      
      if((inp = fopen(best_C_filename, "r"))==NULL) {printf("error opening alignment file %s",best_C_filename); return -1;}
      
      printf("reading alignment file...");fflush(stdout);  
      
      for(i=0;i<MAX_BREAKPOINTS;i++) 
	{
	  best_C.B[i][1]=-1;
	  best_C.B[i][2]=-1;
	}
      i=0;
      while(fgets(line,100,inp)!=NULL)
	{
	  if(strcmp(line,"\n")==0) continue; /* read past any blank lines */
	  if(i>=MAX_BREAKPOINTS) break;
	  p=strtok(line," ");
	  best_C.B[i][1]=atof(p);
	  p=strtok('\0'," ");
	  best_C.B[i][2]=atof(p);
	  i++;
	}
      
      printf("done\nalignment =\n[0.0 0.0;");
      for(i=0;i<MAX_BREAKPOINTS;i++)
	{
	  if(best_C.B[i][1]>0) printf("\n%1.1f %1.1f;",best_C.B[i][1],best_C.B[i][2]);
	}
      printf("]\n"); fflush(stdout);
      
      
      /* read fractions N-15 (fN15[]) */
      
      if((inp = fopen(fN15_filename, "r"))==NULL) {printf("error opening fraction N-15 file %s",fN15_filename); return -1;}
      
      printf("reading fraction N-15 file...");fflush(stdout);  
      
      i=0;
      while(fgets(line,100,inp)!=NULL)
	{
	  if(strcmp(line,"\n")==0) continue; /* read past any blank lines */
	  if(i>=MAX_MULTIPLEX) break;
	  p=strtok(line," ");
	  fN15[i]=atof(p);
	  i++;
	}
      n_multiplex=i;
      printf("done (%d N-15 fractions = [%1.4f",n_multiplex,fN15[0]);
      for(i=1;i<n_multiplex;i++) printf(" %1.4f",fN15[i]);
      printf("])\n"); fflush(stdout);
      
      
      /* initialize mzXML dataset */
      
      printf("checking mzXML dataset %s...",mzXML_filename); fflush(stdout);
      mzXML_file=rampOpenFile(mzXML_filename);
      indexOffset = getIndexOffset (mzXML_file); /* read the index offset */
      pScanIndex = readIndex(mzXML_file, indexOffset, &iLastScan); /* read the scan index into a vector and get LastScan */
      readRunHeader(mzXML_file, pScanIndex, &runHeader, iLastScan);
      printf("done\n"); fflush(stdout);
      
      
      printf("extracting quantitative information from mzXML file...");fflush(stdout);
      
      if ((outp=fopen(output_filename, "w"))==NULL) {printf("\nerror opening output file %s\n",output_filename); return -1;}
      
      /* loop over all unique peptides and extract quantitative information for N-14/N-15 variants */
      
      for(k=0;k<n_unique_peptides;k++) 
	{
	  
	  /* find matching spectrum in MS data */
	  j=0; pepXML_MS_scan=unique_peptide[k].scan;
	  while(best_C.B[j][1]<0) j=j+1;
	  bp1=j; bp2=j+1;
	  for(j=0;j<MAX_BREAKPOINTS-1;j++)
	    {
	      if( (unique_peptide[k].scan>best_C.B[j][1])&&(unique_peptide[k].scan<best_C.B[j+1][1]) )
		{ 
		  bp1=j;
		  bp2=j+1; 
		}
	    }
	  
	  mzXML_MS_scan=1; // ?!
	  
	  /* define y=k_slope*x+m for line segment between breakpoints */
	  if((best_C.B[bp2][1]-best_C.B[bp1][1])>0)
	    {
	      k_slope=(best_C.B[bp2][2]-best_C.B[bp1][2])/(best_C.B[bp2][1]-best_C.B[bp1][1]);
	      m=best_C.B[bp1][2]-k_slope*best_C.B[bp1][1];
	      mzXML_MS_scan=k_slope*pepXML_MS_scan+m; /* pepXML_MS_scan is MS (or sometimes MS/MS scan) from MS/MS pepXML file! */
	    }
	  
	  mzXML_mz=unique_peptide[k].mz ; intensity_max=1.0; 
	  mzXML_argmax=-1;
	  range[0]=mzXML_MS_scan-round(MS_SCAN_WINDOW/2); /* optimize this? */
	  
	  while (range[0]<mzXML_MS_scan+round(MS_SCAN_WINDOW/2))
	    {
	      if( (range[0]>MS_start_scan) && (range[0]<MS_end_scan) )
		{ 
		  readHeader(mzXML_file, pScanIndex[range[0]], &scanHeader);
		  if(scanHeader.msLevel==1) /* MS spectrum */
		    { 
		      range[1]=range[0]; /* get spectra one at a time - could be improved? */
		      if(getMsSpect(&mzXML_spectrum,mzXML_file,range)<0) break; /* MS spectrum not found in mzXML */
		      for(i=0;i<mzXML_spectrum.size;i++) /* # of peaks in peaklist in line spectra */
			{
			  if(((fabs(mzXML_spectrum.xval[i]-mzXML_mz))/mzXML_mz*1e6)<max_mass_measurement_error) 
			    {
			      if(intensity_max<mzXML_spectrum.yval[i])
				{
				  intensity_max=mzXML_spectrum.yval[i]; mzXML_argmax=range[0];
				  // printf("pepXML_MS_scan=%d, mzXML_argmax=%d, intensity_max=%f, unique_peptide[%d]=%s, z=%d\n",pepXML_MS_scan,mzXML_argmax,intensity_max,k,unique_peptide[k].sequence,unique_peptide[k].z);
				}
			    }
			}
		    }
		}
	      range[0]++;
	    }
	  
	  
	  /* extract whole multiplex distribution */
	  
	  
	  if(mzXML_argmax>0)
	    {
	           
	      /* peptide ID from N-14? */
	      
	      if(isotope==14) /* distribution[3] holds the monoisotopic peak, distribution[] also holds three possible peaks before for QC */
		{
		  for(j=-3;j<WINDOW_WIDTH-3;j++) {distribution[j+3]=0; distribution_max[j+3]=0;}
		  distribution_sum=0; distribution_max_sum=0; 
		  for(range[0]=mzXML_argmax-8;range[0]<=mzXML_argmax+12;range[0]+=2) /* average spectra over [-4,+6] relative to mzXML_argmax = [-2,+3] MS spectra */
		    {
		      range[1]=range[0]; /* fetch spectrum with chromatographic peak maximum */
		      readHeader(mzXML_file, pScanIndex[range[0]], &scanHeader);
		      getMsSpect(&mzXML_spectrum,mzXML_file,range); /* must be MS spectrum */
		      
		      for(j=-3;j<WINDOW_WIDTH-3;j++)
			{
			  intensity_sum=0; intensity_max=0;
			  for(i=0;i<mzXML_spectrum.size;i++) /* # of peaks in peaklist in line spectra - for raw data, start closer? */
			    {
			      if(((fabs(mzXML_spectrum.xval[i]-mzXML_mz-j*N15/unique_peptide[k].z))/mzXML_mz*1e6)<integration_window) /* near calculated isotopic peak j? */
				{
				  if(maximum) if((mzXML_spectrum.yval[i]-background_mean)>intensity_max) intensity_max=(mzXML_spectrum.yval[i]-background_mean);
				  intensity_sum+=(mzXML_spectrum.yval[i]-background_mean); /* extract intensities and subtract background... */
				}
			    }
			  
			  if(maximum) 
			    {
			      distribution_max[j+3]+=intensity_max; /* distribution_max contains peak maxima of all isotopic distributions for a peptide*/
			      distribution_max_sum+=intensity_max;
			    }
			  else
			    {
			      distribution[j+3]+=intensity_sum; /* distribution contains entire envelope of all isotopic forms */
			      distribution_sum+=intensity_sum;
			    }
			}
		    }
		}
	      
	      /* peptide ID from N-15? - copy the above and modify for N-15 identifications */
	      
	      if(isotope==15) /* distribution[3] holds the monoisotopic peak, distribution[] also holds three possible peaks before for QC */
		{
		  
		  /* modify the above to integrate over chromatographic peak or +/- a fixed number of scans from the maximum */
		}
	      	      
	      
	      /* common N-14/N-15 peptide ID path continues here... */
	    
	      if(maximum) for(j=-3;j<WINDOW_WIDTH-3;j++) normalized_distribution[j]=distribution_max[j]/distribution_max_sum; /* normalize measure dist. (using maximum in integration window) */
	      else for(j=-3;j<WINDOW_WIDTH-3;j++) normalized_distribution[j]=distribution[j]/distribution_sum; /* normalize measure dist. (integrating whole window) */
	    	      
	      for(i=0;i<n_multiplex;i++) N15_distribution(calc_distribution[i],unique_peptide[k].sequence,unique_peptide[k].z,fN15[i]);
	      	      
	      X=gsl_matrix_alloc(WINDOW_WIDTH,n_multiplex); 
	      yy=gsl_vector_alloc(WINDOW_WIDTH);
	      ww=gsl_vector_alloc(WINDOW_WIDTH);
	      cc=gsl_vector_alloc(n_multiplex);
	      cov=gsl_matrix_alloc(n_multiplex,n_multiplex);
	      
	      for(i=0;i<n_multiplex;i++) gsl_vector_set(cc,i,1);
	      printf("%s %s",unique_peptide[k].protein,unique_peptide[k].sequence);
	      for(n=0;n<WINDOW_WIDTH;n++)
		{
		  gsl_vector_set(yy,n,normalized_distribution[n]);
		  gsl_vector_set(ww,n,1);
		  for(i=0;i<n_multiplex;i++) gsl_matrix_set(X,n,i,calc_distribution[i][n]);
		  printf("%1.4f ",normalized_distribution[n]);
		}
	      printf("\n");
	      work=gsl_multifit_linear_alloc(WINDOW_WIDTH,n_multiplex);
	      gsl_multifit_wlinear(X,ww,yy,cc,cov,&chisq,work);
	      gsl_multifit_linear_free(work);
	   
	      
	      /* print peptide results to output file */
	      
	      sum=0.0;
	      fprintf(outp,"%s %s %f %d %d %d",unique_peptide[k].protein,unique_peptide[k].sequence,mzXML_mz,unique_peptide[k].z,pepXML_MS_scan,mzXML_argmax);
	      for(i=0;i<n_multiplex;i++) {fprintf(outp," %1.4f",gsl_vector_get(cc,(i))); sum+=gsl_vector_get(cc,(i));}
	      fprintf(outp," | %1.4f\n",sum); fflush(outp);
	      
	    }
	  
	}
      printf("done\n"); fflush(stdout);
      fclose(outp);
      rampCloseFile(mzXML_file);
      return 0;
    }
  else /* peptide list mode - for testing the quantitation only using a list of peptides and isotopic envelopes */
    {
       /* read fractions N-15 (fN15[]) */
      
      if((inp = fopen(fN15_filename, "r"))==NULL) {printf("error opening fraction N-15 file %s",fN15_filename); return -1;}
      
      printf("reading fraction N-15 file...");fflush(stdout);  
      
      i=0;
      while(fgets(line,100,inp)!=NULL)
	{
	  if(strcmp(line,"\n")==0) continue; /* read past any blank lines */
	  if(i>=MAX_MULTIPLEX) break;
	  p=strtok(line," ");
	  fN15[i]=atof(p);
	  i++;
	}
      n_multiplex=i;
      printf("done (%d N-15 fractions = [%1.3f",n_multiplex,fN15[0]);
      for(i=1;i<n_multiplex;i++) printf(" %1.3f",fN15[i]);
      printf("])\n"); fflush(stdout);
      
      /* read peptide list with normalized distributions (for debugging or if preprocessing data outside muxQuant) */
      
      if((inp = fopen(list_filename, "r"))==NULL) {printf("error opening peptide list file %s",list_filename); return -1;}
      
      printf("reading/processing peptide list...");fflush(stdout);  
      
      while(fgets(line,2000,inp)!=NULL)
	{
	  if(strcmp(line,"\n")==0) continue; /* read past any blank lines */
	  p=strtok(line," ");
	  strcpy(sequence,p);
	  p=strtok('\0'," ");
	  z=atoi(p);
	  for(i=0;i<n_multiplex;i++) N15_distribution(calc_distribution[i],sequence,z,fN15[i]);
	  for(i=0;i<n_multiplex;i++) { for(n=0;n<WINDOW_WIDTH;n++) printf("%1.4f ",calc_distribution[i][n]); printf("\n");}
	  p=strtok('\0'," ");
	  for(n=0;n<WINDOW_WIDTH;n++)
	    {
	      normalized_distribution[n]=atof(p);
	      p=strtok('\0'," ");
	    }
	  X=gsl_matrix_alloc(WINDOW_WIDTH,n_multiplex); 
	  yy=gsl_vector_alloc(WINDOW_WIDTH);
	  ww=gsl_vector_alloc(WINDOW_WIDTH);
	  cc=gsl_vector_alloc(n_multiplex);
	  cov=gsl_matrix_alloc(n_multiplex,n_multiplex);
	  
	  for(i=0;i<n_multiplex;i++) gsl_vector_set(cc,i,1);
	  printf("\n%s ",sequence);
	  for(n=0;n<WINDOW_WIDTH;n++)
	    {
	      gsl_vector_set(yy,n,normalized_distribution[n]);
	      gsl_vector_set(ww,n,1); // initial value important???
	      for(i=0;i<n_multiplex;i++) gsl_matrix_set(X,n,i,calc_distribution[i][n]);
	      printf("%1.4f ",normalized_distribution[n]);
	    }
	  printf("\n");
	  work=gsl_multifit_linear_alloc(WINDOW_WIDTH,n_multiplex);
	  gsl_multifit_wlinear(X,ww,yy,cc,cov,&chisq,work);
	  gsl_multifit_linear_free(work);
	  
	  /* print peptide results to output file */
	  
	  sum=0.0;
	  printf("%s %d ",sequence,z);
	  for(i=0;i<n_multiplex;i++) {printf(" %1.4f",gsl_vector_get(cc,(i))); sum+=gsl_vector_get(cc,(i));}
	  printf(" | %1.4f\n",sum); fflush(stdout);

	}

      return 0;
    }
}
