/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package jmotifx.preprocess;
import database.AminoAcid;
import database.Database;
import database.Protein;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import jmotifx.sequenceobjects.AroundSiteFPeptideObject;
/**
*
* @author paiyeta1
*/
public class JMotifXAroundSitePeptideObjectsMaker {
public ArrayList<AroundSiteFPeptideObject> createSequencesAroundSiteFPeptideObjects(ArrayList<String> peptideSequences,
HashMap<String,String> configMap){
System.out.println(" Creating sequences' around-site-formatted-peptide objects....");
String centerResidue = configMap.get("centerResidue");
ArrayList<AroundSiteFPeptideObject> asfpobjs = new ArrayList<AroundSiteFPeptideObject>(); //Around site fpeptide objects - asfpo
for(String peptideSequence : peptideSequences){
peptideSequence = peptideSequence.replaceAll("[^A-Z]", "");
AroundSiteFPeptideObject asfpobj = new AroundSiteFPeptideObject(peptideSequence,centerResidue);
asfpobjs.add(asfpobj);
}
return asfpobjs;
}
public ArrayList<AroundSiteFPeptideObject> createDBAroundSiteFPeptideObjects(Database dbase, HashMap<String,String> configMap) {
System.out.println(" Creating database around-site-formatted-peptide objects....");
String centerResidue = configMap.get("centerResidue");
int peptideWindow = Integer.parseInt(configMap.get("peptideWindow"));
int preNPostSiteLength = peptideWindow/2;
ArrayList<AroundSiteFPeptideObject> bGFPep = new ArrayList<AroundSiteFPeptideObject>();
ArrayList<Protein> proteins = dbase.getProteins();
Iterator<Protein> itr = proteins.iterator();
while(itr.hasNext()){
Protein protein = itr.next();
ArrayList<Integer> residueSiteIndeces = protein.getResidueIndeces(centerResidue);
Iterator<Integer> itr2 = residueSiteIndeces.iterator();
while(itr2.hasNext()){
int residueSiteIndex = itr2.next();
if((residueSiteIndex > preNPostSiteLength ) && ((protein.getSequence().length() - residueSiteIndex) > preNPostSiteLength)){
AminoAcid[] aArr = new AminoAcid[(preNPostSiteLength * 2) + 1];
for(int i = -preNPostSiteLength, j = 0; j < aArr.length; i++,j++){
char symb = protein.getSequence().charAt(residueSiteIndex + i);
boolean isCenter = false;
if( i == 0 ){ isCenter = true; }
int pos = i; // relative position to center...
aArr[j] = new AminoAcid(symb, isCenter, pos);
}
bGFPep.add(new AroundSiteFPeptideObject(aArr));
}
}
}
return bGFPep;
}
public ArrayList<AroundSiteFPeptideObject> createNonRedundantDBFGlycPepObjs(Database dbase) {
System.out.println(" Creating database formatted,non-redundant glycosites objects...");
ArrayList<String> foundSeqs = new ArrayList<String>(); // an arrayList to hold record of already identified peptides. It enable
ArrayList<AroundSiteFPeptideObject> bGFPep = new ArrayList<AroundSiteFPeptideObject>();
ArrayList<Protein> proteins = dbase.getProteins();
Iterator<Protein> itr = proteins.iterator();
while(itr.hasNext()){
Protein protein = itr.next();
ArrayList<Integer> glycosites = protein.getGlycositeIndeces();
Iterator<Integer> itr2 = glycosites.iterator();
while(itr2.hasNext()){
int glyc_loc = itr2.next();
if((glyc_loc > 6 ) && ((protein.getSequence().length() - glyc_loc) > 6)){
AminoAcid[] aArr = new AminoAcid[13];
for(int i = -6, j = 0; j < aArr.length; i++,j++){
char symb = protein.getSequence().charAt(glyc_loc + i);
boolean isGlyc = false;
if( i == 0 ){ isGlyc = true; }
int pos = i;
aArr[j] = new AminoAcid(symb,isGlyc,pos);;
}
//bGFPep.add(new AroundSiteFPeptideObject(aArr));
AroundSiteFPeptideObject fgbo = new AroundSiteFPeptideObject(aArr);
String str = fgbo.getSequence();
if(foundSeqs.contains(str)==false){
foundSeqs.add(str);
bGFPep.add(fgbo);
}
}
}
}
return bGFPep;
}
public ArrayList<AroundSiteFPeptideObject> createRandomNXSTContainingFGlycPepObjs(int size, int blockwidth, Database db) {
//throw new UnsupportedOperationException("Not yet implemented");
System.out.println(" Creating random Nx[ST] containing glycosite objects...");
ArrayList<AroundSiteFPeptideObject> rPep = new ArrayList<AroundSiteFPeptideObject>();
AminoAcid[] dbAAsArr = db.getAminoAcids();
for(int i = 0; i < size; i++){
AminoAcid[] aArr = new AminoAcid[13]; //defaults to 13-mers
char[] sT = {'S','T'}; // S or T
Random random = new Random(); //random number generator
for(int j = -6, k = 0; j < 7; j++, k++){
//randomly assign aminoacid char for positions -6 to -1, +1, and +3 to +6
if(j <= -1 || j == 1 || j >= 3){
char symb = dbAAsArr[random.nextInt(dbAAsArr.length)].getSymbol();
boolean isGlyc = false;
int pos = j;
aArr[k] = new AminoAcid(symb,isGlyc,pos);
}
if( j == 0 ){
char symb = 'N';
boolean isGlyc = true;
int pos = j;
aArr[k] = new AminoAcid(symb,isGlyc,pos);
}
if( j == 2 ){
char symb = sT[random.nextInt(sT.length)];
boolean isGlyc = false;
int pos = j;
aArr[k] = new AminoAcid(symb,isGlyc,pos);
}
}
rPep.add(new AroundSiteFPeptideObject(aArr));
}
return rPep;
}
}