Package org.apache.ctakes.ytex.R

Source Code of org.apache.ctakes.ytex.R.RGramMatrixExporterImpl

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.R;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import java.util.SortedSet;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.ytex.kernel.FileUtil;
import org.apache.ctakes.ytex.kernel.InstanceData;
import org.apache.ctakes.ytex.kernel.KernelContextHolder;
import org.apache.ctakes.ytex.kernel.KernelUtil;
import org.apache.ctakes.ytex.kernel.dao.KernelEvaluationDao;
import org.apache.ctakes.ytex.sparsematrix.InstanceDataExporter;


public class RGramMatrixExporterImpl implements RGramMatrixExporter {
  private static final Log log = LogFactory.getLog(RGramMatrixExporter.class);

  @SuppressWarnings("static-access")
  public static void main(String args[]) throws IOException {
    Options options = new Options();
    options.addOption(OptionBuilder
        .withArgName("prop")
        .hasArg()
        .isRequired()
        .withDescription(
            "property file with queries and other kernel parameters")
        .create("prop"));
    try {
      CommandLineParser parser = new GnuParser();
      CommandLine line = parser.parse(options, args);
      RGramMatrixExporter exporter = (RGramMatrixExporter) KernelContextHolder
          .getApplicationContext().getBean(RGramMatrixExporter.class);
      exporter.exportGramMatrix(FileUtil.loadProperties(
          line.getOptionValue("prop"), true));
    } catch (ParseException pe) {
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp("java " + RGramMatrixExporterImpl.class.getName()
          + " export gram matrix for use in R/Matlab", options);
    }
  }

  private InstanceDataExporter instanceDataExporter;
  private KernelEvaluationDao kernelEvaluationDao;

  private KernelUtil kernelUtil;

  private void exportGramMatrices(String name, String experiment,
      double param1, String param2, String splitName, String scope,
      String outdir, InstanceData instanceData) throws IOException {
    if (scope == null || scope.length() == 0) {
      exportGramMatrix(name, experiment, param1, param2, splitName,
          outdir, instanceData, null, 0, 0);
    } else {
      for (String label : instanceData.getLabelToInstanceMap().keySet()) {
        if ("label".equals(scope)) {
          exportGramMatrix(name, experiment, param1, param2,
              splitName, outdir, instanceData, label, 0, 0);
        } else if ("fold".equals(scope)) {
          for (int run : instanceData.getLabelToInstanceMap()
              .get(label).keySet()) {
            for (int fold : instanceData.getLabelToInstanceMap()
                .get(label).get(run).keySet()) {
              exportGramMatrix(name, experiment, param1, param2,
                  splitName, outdir, instanceData, label,
                  run, fold);
            }
          }
        }
      }
    }
  }

  private void exportGramMatrix(String name, String experiment,
      double param1, String param2, String splitName, String outdir,
      InstanceData instanceData, String label, int run, int fold)
      throws IOException {
    SortedSet<Long> instanceIds = instanceData.getAllInstanceIds(label,
        run, fold);
    String filePrefix = FileUtil.getDataFilePrefix(outdir, label, run,
        fold, null);
    double[][] gramMatrix = kernelUtil.loadGramMatrix(instanceIds, name,
        splitName, experiment, label, run, fold, param1, param2);
    if (gramMatrix != null)
      outputGramMatrix(gramMatrix, instanceIds, filePrefix);
  }

  // private KernelEvaluation getKernelEval(String name, String splitName,
  // String experiment, String label, int run, int fold, double param1,
  // String param2) {
  // int foldId = 0;
  // if (run != 0 && fold != 0) {
  // CrossValidationFold f = this.classifierEvaluationDao
  // .getCrossValidationFold(name, splitName, label, run, fold);
  // if (f != null)
  // foldId = f.getCrossValidationFoldId();
  // }
  // KernelEvaluation kEval = this.kernelEvaluationDao.getKernelEval(name,
  // experiment, label, foldId, param1, param2);
  // if (kEval == null) {
  // log.warn("could not find kernelEvaluation.  name=" + name
  // + ", experiment=" + experiment + ", label=" + label
  // + ", fold=" + fold + ", run=" + run);
  // }
  // return kEval;
  // }

  /*
   * (non-Javadoc)
   *
   * @see org.apache.ctakes.ytex.R.RGramMatrixExporter#exportGramMatrix(java.util.Properties)
   */
  @Override
  public void exportGramMatrix(Properties props) throws IOException {
    String name = props.getProperty("org.apache.ctakes.ytex.corpusName");
    String splitName = props.getProperty("org.apache.ctakes.ytex.splitName");
    String experiment = props.getProperty("org.apache.ctakes.ytex.experiment");
    String param2 = props.getProperty("org.apache.ctakes.ytex.param2");
    double param1 = Double.parseDouble(props
        .getProperty("org.apache.ctakes.ytex.param1", "0"));
    String scope = props.getProperty("scope");
    String outdir = props.getProperty("outdir");
    InstanceData instanceData = this.getKernelUtil().loadInstances(
        props.getProperty("instanceClassQuery"));
    exportGramMatrices(name, experiment, param1, param2, splitName, scope,
        outdir, instanceData);
    instanceDataExporter.outputInstanceData(instanceData,
        FileUtil.addFilenameToDir(outdir, "instance.txt"));
  }

  //
  // private void exportGramMatrix(String name, String experiment,
  // double param1, String param2, String outdir,
  // InstanceData instanceData, String label, int foldId)
  // throws IOException {
  // SortedSet<Integer> instanceIds = getAllInstanceIdsForLabel(
  // instanceData, label);
  // double[][] gramMatrix = new double[instanceIds.size()][instanceIds
  // .size()];
  // KernelEvaluation kernelEval = this.kernelEvaluationDao.getKernelEval(
  // name, experiment, label, 0, param1, param2);
  // if (kernelEval != null) {
  // kernelUtil.fillGramMatrix(kernelEval, instanceIds, gramMatrix,
  // null, null);
  // outputInstanceData(instanceData, label, outdir);
  // outputGramMatrix(kernelEval, gramMatrix, instanceIds,
  // FileUtil.getDataFilePrefix(outdir, label, 0, 0, null));
  // } else {
  // log.info("no kernel eval for label=" + label);
  // }
  //
  // }

  // /**
  // * get all instance ids for the specified label
  // *
  // * @param instanceData
  // * @param label
  // * @return
  // */
  // private SortedSet<Integer> getAllInstanceIdsForLabel(
  // InstanceData instanceData, String label) {
  // SortedSet<Integer> instanceIds = new TreeSet<Integer>();
  // for (int run : instanceData.getLabelToInstanceMap().get(label).keySet())
  // {
  // for (int fold : instanceData.getLabelToInstanceMap().get(label)
  // .get(run).keySet()) {
  // for (SortedMap<Integer, String> instanceLabelMap : instanceData
  // .getLabelToInstanceMap().get(label).get(run).get(fold)
  // .values()) {
  // instanceIds.addAll(instanceLabelMap.keySet());
  // }
  // }
  // }
  // return instanceIds;
  // }

  public InstanceDataExporter getInstanceDataExporter() {
    return instanceDataExporter;
  }

  public KernelEvaluationDao getKernelEvaluationDao() {
    return kernelEvaluationDao;
  }

  public KernelUtil getKernelUtil() {
    return kernelUtil;
  }

  private void outputGramMatrix(double[][] gramMatrix,
      SortedSet<Long> instanceIds, String dataFilePrefix)
      throws IOException {
    BufferedWriter w = null;
    BufferedWriter wId = null;
    try {
      w = new BufferedWriter(new FileWriter(dataFilePrefix + "data.txt"));
      wId = new BufferedWriter(new FileWriter(dataFilePrefix
          + "instance_id.txt"));
      Long instanceIdArray[] = instanceIds.toArray(new Long[] {});
      // write instance id corresponding to row
      for (int h = 0; h < instanceIdArray.length; h++) {
        wId.write(Long.toString(instanceIdArray[h]));
        wId.write("\n");
      }
      for (int i = 0; i < instanceIdArray.length; i++) {
        // write line from gram matrix
        for (int j = 0; j < instanceIdArray.length; j++) {
          w.write(Double.toString(gramMatrix[i][j]));
          if (j < instanceIdArray.length - 1)
            w.write(" ");
        }
        w.write("\n");
      }
    } finally {
      if (w != null) {
        w.close();
      }
      if (wId != null) {
        wId.close();
      }
    }
  }

  public void setInstanceDataExporter(
      InstanceDataExporter instanceDataExporter) {
    this.instanceDataExporter = instanceDataExporter;
  }

  public void setKernelEvaluationDao(KernelEvaluationDao kernelEvaluationDao) {
    this.kernelEvaluationDao = kernelEvaluationDao;
  }

  public void setKernelUtil(KernelUtil kernelUtil) {
    this.kernelUtil = kernelUtil;
  }

  // private void exportGramMatrices(String name, String experiment,
  // String outdir, String instanceQuery) throws IOException {
  // SortedMap<Integer, SortedMap<Boolean, SortedMap<Integer, Integer>>>
  // instanceFolds = new TreeMap<Integer, SortedMap<Boolean,
  // SortedMap<Integer, Integer>>>();
  // SortedMap<String, SortedMap<Integer, String>> instanceLabels = new
  // TreeMap<String, SortedMap<Integer, String>>();
  //
  // }
  //
  // private void exportLabel(String name, String experiment, String outdir) {
  // }
  //
  // private static class InstanceFoldData {
  // SortedMap<Boolean, SortedMap<Integer, Integer>> folds;
  //
  // public void addEntry(boolean train, int fold, int run) {
  // SortedMap<Integer, Integer> foldToRun = folds.get(train);
  // if (foldToRun == null) {
  // if (fold != 0) {
  // foldToRun = new TreeMap<Integer, Integer>();
  // foldToRun.put(fold, run);
  // }
  // }
  // folds.put(train, foldToRun);
  // }
  // }
  //
  // private void loadInstanceData(
  // String strQuery,
  // final SortedMap<Integer, SortedMap<Boolean, SortedMap<Integer, Integer>>>
  // instanceFolds,
  // final SortedMap<String, SortedMap<Integer, String>> instanceLabels) {
  // jdbcTemplate.query(strQuery, new RowCallbackHandler() {
  //
  // @Override
  // public void processRow(ResultSet rs) throws SQLException {
  // String label = "";
  // int run = 0;
  // int fold = 0;
  // Boolean train = null;
  // int instanceId = rs.getInt(1);
  // String className = rs.getString(2);
  // if (rs.getMetaData().getColumnCount() >= 3)
  // train = rs.getBoolean(3);
  // if (rs.getMetaData().getColumnCount() >= 4)
  // label = rs.getString(4);
  // if (rs.getMetaData().getColumnCount() >= 5)
  // fold = rs.getInt(5);
  // if (rs.getMetaData().getColumnCount() >= 6)
  // run = rs.getInt(6);
  // // set instance className for label
  // SortedMap<Integer, String> instClassName = instanceLabels
  // .get(label);
  // if (instClassName == null) {
  // instClassName = new TreeMap<Integer, String>();
  // instClassName.put(instanceId, labels);
  // }
  // labels.put(label, className);
  // // set fold data
  // if (train != null) {
  // // we split into train/test - save this in the instanceFolds
  // SortedMap<Boolean, SortedMap<Integer, Integer>> folds = instanceFolds
  // .get(instanceId);
  // if (folds == null) {
  // folds = new TreeMap<Boolean, SortedMap<Integer, Integer>>();
  // instanceFolds.put(instanceId, folds);
  // }
  // // we split into folds / runs
  // SortedMap<Integer, Integer> foldToRun = folds.get(train);
  // if (foldToRun == null) {
  // if (fold != 0) {
  // foldToRun = new TreeMap<Integer, Integer>();
  // foldToRun.put(fold, run);
  // }
  // }
  // // add train/test flag
  // // foldToRun is null if we don't have any folds
  // folds.put(train, foldToRun);
  // }
  // }
  // });
  // }
}
TOP

Related Classes of org.apache.ctakes.ytex.R.RGramMatrixExporterImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.