Package com.jgaap.backend

Source Code of com.jgaap.backend.ExperimentEngine$Experiment

/*
* JGAAP -- a graphical program for stylometric authorship attribution
* Copyright (C) 2009,2011 by Patrick Juola
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package com.jgaap.backend;

import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.log4j.Logger;

import com.jgaap.JGAAPConstants;
import com.jgaap.generics.AnalysisDriver;
import com.jgaap.generics.EventDriver;
import com.jgaap.generics.ValidationDriver;
import com.jgaap.util.Document;

/**
* Experiment Engine This class takes a csv file of experiments and then will
* run them one after the other and generates result files in the tmp directory
*
* @author Mike Ryan
*/
public class ExperimentEngine {

  static Logger logger = Logger.getLogger(ExperimentEngine.class);

  private static final int workers = 1;
 
  private static String language = "english";

  /**
   * This method generates unique file names and a directory structure to save
   * the results of an experiment run
   *
   * @param canons
   *            the canonicizors used
   * @param events
   *            the events used
   * @param analysis
   *            the analysis method or distance function used
   * @param experimentName
   *            the given name of this experiment specified on the top line of
   *            the experiment csv file
   * @param number
   *            the identifier given to this experiment
   * @return the location of where the file will be written
   */
  public static String fileNameGen(List<String> canons, String[] events, String analysis, String experimentName,
      String number) {
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
    Date date = new Date();
    Iterator<String> iterator = canons.iterator();
    StringBuilder canonNameBuilder = new StringBuilder();
    while (iterator.hasNext()) {
      canonNameBuilder.append(iterator.next().trim()).append(" ");
    }
    String canonName = canonNameBuilder.toString().trim();
    if (canonName.isEmpty())
      canonName = "none";
    iterator = Arrays.asList(events).iterator();
    StringBuilder eventNameBuilder = new StringBuilder();
    while (iterator.hasNext()) {
      eventNameBuilder.append(iterator.next().trim()).append(" ");
    }
    String eventName = eventNameBuilder.toString().trim();
    String path = JGAAPConstants.JGAAP_TMPDIR + canonName.replace("/", "\\/") + JGAAPConstants.separator
        + eventName.trim().replace("/", "\\/") + JGAAPConstants.separator + analysis.trim().replace("/", "\\/") + JGAAPConstants.separator;
    File file = new File(path);
    boolean newDirs = file.mkdirs();
    if (!newDirs) {
      ; // Nothing (check added to satisfy static analysis / show we are
        // aware of this)
    }
    return (path + experimentName + number + dateFormat.format(date) + ".txt");
  }

  /**
   * This method will iterate a the rows of a csv file of experiments running
   * jgaap on each one and then generate a results file for it
   *
   * @param listPath
   *            the location of the csv file of experiments
   */

  public static void runExperiment(String listPath, String lang) {
    try {
      runExperiment(CSVIO.readCSV(listPath), lang);
    } catch (IOException e) {
      logger.fatal("Problem processing experiment file: " + listPath, e);
    }

  }

  public static void runExperiment(List<List<String>> experimentTable, String lang) {
    if (lang != null)
      language = lang;
    final String experimentName = experimentTable.remove(0).get(0);
    ExecutorService experimentExecutor = Executors.newFixedThreadPool(workers);
    List<Future<String>> runningExperiments = new ArrayList<Future<String>>(experimentTable.size());
    for (final List<String> experimentRow : experimentTable) {
      if (experimentRow.isEmpty()) {
        continue;
      } else if (experimentRow.size() >= 6) {
        String number = experimentRow.get(0);
        String[] canonicizers = experimentRow.get(1).trim().split("\\s*&\\s*");
        String[] events = experimentRow.get(2).trim().split("\\s*&\\s*");
        String analysis = experimentRow.get(3).trim();
        String distance = experimentRow.get(4).trim();
        String documentsPath = experimentRow.get(5).trim();
        String fileName = fileNameGen(Arrays.asList(canonicizers), events, analysis
            + (distance.isEmpty() ? "" : "-" + distance), experimentName, number);

        runningExperiments.add(experimentExecutor.submit(new Experiment(canonicizers, events, analysis,
            distance, documentsPath, fileName)));
      } else {
        logger.error("Experiment " + experimentRow.toString() + " missing " + (6 - experimentRow.size())
            + " column(s)");
      }
    }
    experimentExecutor.shutdown();

    while (!experimentExecutor.isTerminated()) {
      Iterator<Future<String>> iterator = runningExperiments.iterator();
      while (iterator.hasNext()) {
        Future<String> current = iterator.next();
        if (current.isDone()) {
          try {
            logger.info("Experiment: " + current.get() + " has finished.");
          } catch (InterruptedException e) {
            logger.error("Problem printing experiment completion", e);
          } catch (ExecutionException e) {
            logger.error("Problem printing experiment completion", e);
          }
          iterator.remove();
          System.gc(); //I know this is terrible and should be removed MVR (Dear Future Me, Please forgive me.)
        }
      }
    }
  }

  private static class Experiment implements Callable<String> {

    private String[] canonicizers;
    private String[] events;
    private String analysis;
    private String distance;
    private String documentsPath;
    private String fileName;

    public Experiment(String[] canonicizers, String[] events, String analysis, String distance,
        String documentsPath, String fileName) {
      this.canonicizers = canonicizers;
      this.events = events;
      this.analysis = analysis;
      this.distance = distance;
      this.documentsPath = documentsPath;
      this.fileName = fileName;
    }

    @Override
    public String call() throws Exception {
      API experiment = API.getPrivateInstance();
      experiment.setLanguage(language);
      try {
        List<List<String>> tmp;
        if (documentsPath.startsWith(JGAAPConstants.JGAAP_RESOURCE_PACKAGE)) {
          tmp = CSVIO.readCSV(com.jgaap.JGAAP.class.getResourceAsStream(documentsPath));
        } else {
          tmp = CSVIO.readCSV(documentsPath);
        }
        List<Document> documents = Utils.getDocumentsFromCSV(tmp);
        for (Document document : documents) {
          experiment.addDocument(document);
        }
        for (String canonicizer : canonicizers) {
          if (!canonicizer.isEmpty())
            experiment.addCanonicizer(canonicizer);
        }
        for (String event : events) {
          String[] canons = null;
          String[] cullers = null;
          String[] splitCanon = event.split("@", 2);
          if(splitCanon.length > 1){
            event = splitCanon[0];
            String[] splitCuller = splitCanon[1].split("#",2);
            canons = splitCuller[0].split("@");
            if(splitCuller.length > 1){
              cullers = splitCuller[1].split("#");
            }
          } else {
            String[] splitCuller = event.split("#",2);
            if(splitCuller.length > 1){
              event = splitCuller[0];
              cullers = splitCuller[1].split("#");
            }
          }
          EventDriver eventDriver = experiment.addEventDriver(event.trim());
          if(canons != null) {
            for(String canon : canons) {
              experiment.addCanonicizer(canon.trim(), eventDriver);
            }
          }
          if(cullers != null) {
            for(String culler : cullers) {
              experiment.addEventCuller(culler.trim(), eventDriver);
            }
          }
        }
        AnalysisDriver analysisDriver = experiment.addAnalysisDriver(analysis);
        if (!distance.isEmpty()) {
          experiment.addDistanceFunction(distance, analysisDriver);
        }
        experiment.execute();
        List<Document> resultDocuments;
        if (analysisDriver instanceof ValidationDriver) {
          resultDocuments = experiment.getDocuments();
        } else {
          resultDocuments = experiment.getUnknownDocuments();
        }
        Path filePath = FileSystems.getDefault().getPath(fileName);
        Writer writer = Files.newBufferedWriter(filePath, Charset.defaultCharset(), StandardOpenOption.CREATE);
        for (Document resultDocument : resultDocuments) {
          writer.append(resultDocument.getFormattedResult(analysisDriver));
        }
        writer.close();
      } catch (Exception e) {
        logger.error("Could not run experiment " + fileName, e);
      }
      return fileName;
    }

  }

}
TOP

Related Classes of com.jgaap.backend.ExperimentEngine$Experiment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.