experiment = new MsExperiment(cliInputBean.getiExperimentID());
sample = new Sample(cliInputBean.getiSampleID());
replicateNumber = cliInputBean.getReplicate();
// Create the analysis set of this PeptideShaker process
SampleAnalysisSet analysisSet = new SampleAnalysisSet(sample, new ProteomicAnalysis(replicateNumber));
experiment.addAnalysisSet(sample, analysisSet);
// Set the project details
projectDetails = new ProjectDetails();
projectDetails.setCreationDate(new Date());
projectDetails.setPeptideShakerVersion(new Properties().getVersion());
// Get the search parameters
searchParameters = cliInputBean.getIdentificationParameters();
String error = PeptideShaker.loadModifications(searchParameters);
if (error != null) {
System.out.println(error);
}
// Get the default sequence matching preferences
sequenceMatchingPreferences = SequenceMatchingPreferences.getDefaultSequenceMatching(searchParameters);
// Get the input files
ArrayList<File> identificationFilesInput = cliInputBean.getIdFiles();
ArrayList<File> dataFolders = new ArrayList<File>();
ArrayList<File> spectrumFiles = cliInputBean.getSpectrumFiles();
// export data from zip files, try to find the mgf files
ArrayList<File> identificationFiles = new ArrayList<File>();
for (File inputFile : identificationFilesInput) {
File parentFile = inputFile.getParentFile();
if (!dataFolders.contains(parentFile)) {
dataFolders.add(parentFile);
}
File dataFolder = new File(parentFile, "mgf");
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
dataFolder = new File(parentFile, "fasta");
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
dataFolder = new File(parentFile, PeptideShaker.DATA_DIRECTORY);
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
String fileName = inputFile.getName();
if (fileName.toLowerCase().endsWith("zip")) {
waitingHandler.appendReport("Unzipping " + fileName + ".", true, true);
String newName = FileImporter.getTempFolderName(fileName);
File destinationFolder = new File(parentFile, newName);
destinationFolder.mkdir();
TempFilesManager.registerTempFolder(destinationFolder);
ZipUtils.unzip(inputFile, destinationFolder, waitingHandler);
dataFolder = new File(destinationFolder, PeptideShaker.DATA_DIRECTORY);
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
dataFolder = new File(destinationFolder, "mgf");
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
dataFolder = new File(destinationFolder, "fasta");
if (dataFolder.exists() && !dataFolders.contains(dataFolder)) {
dataFolders.add(dataFolder);
}
for (File zippedFile : destinationFolder.listFiles()) {
String nameLowerCase = zippedFile.getName().toLowerCase();
if (nameLowerCase.endsWith("dat")
|| nameLowerCase.endsWith("omx")
|| nameLowerCase.endsWith("xml")
|| nameLowerCase.endsWith("mzid")
|| nameLowerCase.endsWith("csv")
|| nameLowerCase.endsWith("tags")) {
if (!nameLowerCase.endsWith("mods.xml")
&& !nameLowerCase.endsWith("usermods.xml")
&& !nameLowerCase.endsWith("settings.xml")) {
identificationFiles.add(zippedFile);
}
}
}
} else {
identificationFiles.add(inputFile);
}
}
// List the spectrum files found
ArrayList<String> names = new ArrayList<String>();
for (File spectrumFile : spectrumFiles) {
names.add(spectrumFile.getName());
}
for (File dataFolder : dataFolders) {
for (File file : dataFolder.listFiles()) {
String name = file.getName();
if (name.endsWith(".mgf") && !names.contains(name)) {
spectrumFiles.add(file);
names.add(name);
}
}
}
// try to locate the fasta file
File fastaFile = searchParameters.getFastaFile();
if (!fastaFile.exists()) {
boolean found = false;
// look in the database folder {
try {
UtilitiesUserPreferences utilitiesUserPreferences = UtilitiesUserPreferences.loadUserPreferences();
File dbFolder = utilitiesUserPreferences.getDbFolder();
File newFile = new File(dbFolder, fastaFile.getName());
if (newFile.exists()) {
fastaFile = newFile;
searchParameters.setFastaFile(fastaFile);
found = true;
}
} catch (Exception e) {
e.printStackTrace();
}
if (!found) {
// look in the data folders
for (File dataFolder : dataFolders) {
File newFile = new File(dataFolder, fastaFile.getName());
if (newFile.exists()) {
fastaFile = newFile;
searchParameters.setFastaFile(fastaFile);
found = true;
break;
}
}
if (!found) {
waitingHandler.appendReport("FASTA file \'" + fastaFile.getName() + "\' not found.", true, true);
}
}
}
// set the filtering import settings
idFilter = new IdFilter();
idFilter.setMinPepLength(cliInputBean.getMinPepLength());
idFilter.setMaxPepLength(cliInputBean.getMaxPepLength());
idFilter.setMaxMzDeviation(cliInputBean.getMaxMzDeviation());
idFilter.setIsPpm(cliInputBean.isMaxMassDeviationPpm());
idFilter.setRemoveUnknownPTMs(cliInputBean.excludeUnknownPTMs());
// set the processing settings
processingPreferences = new ProcessingPreferences();
processingPreferences.setPsmFDR(cliInputBean.getPsmFDR());
processingPreferences.setPeptideFDR(cliInputBean.getPeptideFDR());
processingPreferences.setProteinFDR(cliInputBean.getProteinFDR());
processingPreferences.setProteinConfidenceMwPlots(cliInputBean.getProteinConfidenceMwPlots());
// set the PTM scoring preferences
ptmScoringPreferences = new PTMScoringPreferences();
if (cliInputBean.getPtmScore() != null) {
ptmScoringPreferences.setProbabilitsticScoreCalculation(true);
ptmScoringPreferences.setSelectedProbabilisticScore(cliInputBean.getPtmScore());
ptmScoringPreferences.setProbabilisticScoreNeutralLosses(cliInputBean.isaScoreNeutralLosses());
if (cliInputBean.getPtmScoreThreshold() != null) {
ptmScoringPreferences.setEstimateFlr(false);
ptmScoringPreferences.setProbabilisticScoreThreshold(cliInputBean.getPtmScoreThreshold());
} else {
ptmScoringPreferences.setEstimateFlr(true);
}
} else {
ptmScoringPreferences.setProbabilitsticScoreCalculation(false);
}
// set the gene preferences
if (cliInputBean.getSpecies() != null) {
try {
genePreferences = new GenePreferences();
genePreferences.loadSpeciesAndGoDomains();
genePreferences.setCurrentSpecies(cliInputBean.getSpecies());
genePreferences.setCurrentSpeciesType(cliInputBean.getSpeciesType());
// try to download gene and go information
GeneFactory geneFactory = GeneFactory.getInstance();
String currentEnsemblSpeciesType = cliInputBean.getSpeciesType().toLowerCase();
if (currentEnsemblSpeciesType.equalsIgnoreCase("Vertebrates")) {
currentEnsemblSpeciesType = "ensembl";
}
Integer latestEnsemblVersion = geneFactory.getCurrentEnsemblVersion(currentEnsemblSpeciesType);
String selectedSpecies = cliInputBean.getSpecies();
String selectedDb = genePreferences.getEnsemblDatabaseName(cliInputBean.getSpeciesType(), selectedSpecies);
String currentEnsemblVersionAsString = genePreferences.getEnsemblVersion(selectedDb);
boolean downloadNewMappings;
if (currentEnsemblVersionAsString == null) {
if (cliInputBean.updateSpecies()) {
downloadNewMappings = true;
} else {
waitingHandler.appendReport("Species and GO mappings where not found for " + selectedSpecies + "! Download manually or use the species_update option.", true, true);
waitingHandler.setRunCanceled();
downloadNewMappings = false;
}
} else {
if (cliInputBean.updateSpecies()) {
downloadNewMappings = checkForSpeciesUpdate(currentEnsemblVersionAsString, latestEnsemblVersion);
} else {
downloadNewMappings = false;
}
}
// download mappings if needed
if (downloadNewMappings) {
genePreferences.clearOldMappings(cliInputBean.getSpeciesType(), selectedSpecies, true);
genePreferences.downloadMappings(waitingHandler, cliInputBean.getSpeciesType(), selectedSpecies, true);
}
} catch (IOException e) {
System.out.println("Failed to load the species and GO domains!");
e.printStackTrace();
}
}
// set the spectrum counting prefrences
spectrumCountingPreferences = new SpectrumCountingPreferences();
// set the annotation preferences
annotationPreferences = new AnnotationPreferences();
annotationPreferences.setPreferencesFromSearchParameters(searchParameters);
IonFactory.getInstance().addDefaultNeutralLoss(NeutralLoss.NH3);
IonFactory.getInstance().addDefaultNeutralLoss(NeutralLoss.H2O);
// create a shaker which will perform the analysis
PeptideShaker peptideShaker = new PeptideShaker(experiment, sample, replicateNumber);
// import the files
peptideShaker.importFiles(waitingHandler, idFilter, identificationFiles, spectrumFiles, searchParameters,
annotationPreferences, projectDetails, processingPreferences, ptmScoringPreferences,
spectrumCountingPreferences, sequenceMatchingPreferences, false);
// show the warnings
Iterator<String> iterator = peptideShaker.getWarnings().keySet().iterator();
while (iterator.hasNext()) {
FeedBack warning = peptideShaker.getWarnings().get(iterator.next());
if (warning.getType() == FeedBack.FeedBackType.WARNING) {
System.out.println(warning.getMessage()); // @TODO: better interaction between notes and feedback objetcs...
}
}
if (!waitingHandler.isRunCanceled()) {
// identification as created by PeptideShaker
ProteomicAnalysis tempProteomicAnalysis = experiment.getAnalysisSet(sample).getProteomicAnalysis(replicateNumber);
identification = tempProteomicAnalysis.getIdentification(IdentificationMethod.MS2_IDENTIFICATION);
// metrics saved while processing the data
metrics = peptideShaker.getMetrics();
// the identification feature generator