Package org.apache.ctakes.ytex.kernel

Source Code of org.apache.ctakes.ytex.kernel.SparseDataExporterImpl

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.InvalidPropertiesFormatException;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

import javax.sql.DataSource;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;

public class SparseDataExporterImpl implements SparseDataExporter {

  private static final Log log = LogFactory
      .getLog(SparseDataExporterImpl.class);

  @SuppressWarnings("static-access")
  public static void main(String args[]) throws IOException {
    Options options = new Options();
    options.addOption(OptionBuilder
        .withArgName("prop")
        .hasArg()
        .isRequired()
        .withDescription(
            "property file with queries and other parameters.")
        .create("prop"));
    options.addOption(OptionBuilder.withArgName("type").hasArg()
        .isRequired()
        .withDescription("export format; valid values: weka, libsvm")
        .create("type"));
    if (args.length == 0)
      printHelp(options);
    else {
      try {
        CommandLineParser parser = new GnuParser();
        CommandLine line = parser.parse(options, args);
        String propFile = line.getOptionValue("prop");
        String format = line.getOptionValue("type");
        SparseDataExporter exporter = KernelContextHolder
            .getApplicationContext().getBean(
                SparseDataExporter.class);
        exporter.exportData(propFile, format);
      } catch (ParseException pe) {
        printHelp(options);
      }
    }
  }

  private static void printHelp(Options options) {
    HelpFormatter formatter = new HelpFormatter();

    formatter.printHelp("java " + SparseDataExporterImpl.class.getName()
        + " export sparse data", options);
  }

  protected JdbcTemplate jdbcTemplate;
  protected KernelUtil kernelUtil;

  protected NamedParameterJdbcTemplate namedJdbcTemplate;

  protected Map<String, SparseDataFormatterFactory> nameToFormatterMap = new HashMap<String, SparseDataFormatterFactory>();

  protected SimpleJdbcTemplate simpleJdbcTemplate;

  protected TransactionTemplate txTemplateNew;

  public SparseDataExporterImpl() {
    super();
  }

  protected void addNominalWordToInstance(SparseData sparseData,
      long instanceId, String word, String wordValue) {
    // add the instance id to the set of instance ids if necessary
    if (!sparseData.getInstanceIds().contains(instanceId))
      sparseData.getInstanceIds().add(instanceId);
    SortedMap<String, String> instanceWords = sparseData
        .getInstanceNominalWords().get(instanceId);
    SortedSet<String> wordValueSet = sparseData.getNominalWordValueMap()
        .get(word);
    if (instanceWords == null) {
      instanceWords = new TreeMap<String, String>();
      sparseData.getInstanceNominalWords().put(instanceId, instanceWords);
    }
    if (wordValueSet == null) {
      wordValueSet = new TreeSet<String>();
      sparseData.getNominalWordValueMap().put(word, wordValueSet);
    }
    // add the word-value for the instance
    instanceWords.put(word, wordValue);
    // add the value to the set of valid values
    wordValueSet.add(wordValue);
  }

  protected void addNumericWordToInstance(SparseData sparseData,
      long instanceId, String word, double wordValue) {
    // add the instance id to the set of instance ids if necessary
    if (!sparseData.getInstanceIds().contains(instanceId))
      sparseData.getInstanceIds().add(instanceId);
    // add the numeric word to the map of words for this document
    SortedMap<String, Double> words = sparseData.getInstanceNumericWords()
        .get(instanceId);
    if (words == null) {
      words = new TreeMap<String, Double>();
      sparseData.getInstanceNumericWords().put(instanceId, words);
    }
    words.put(word, wordValue);
    sparseData.getNumericWords().add(word);
  }

  /*
   * (non-Javadoc)
   *
   * @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(org.apache.ctakes.ytex.kernel.SparseData,
   * org.apache.ctakes.ytex.kernel.SparseDataFormatter, java.util.Properties)
   */
  public void exportData(InstanceData instanceLabel,
      SparseDataFormatter formatter, Properties properties,
      BagOfWordsDecorator bDecorator) throws IOException {
    String scope = properties.getProperty("scope", null);
    SparseData sparseData = null;
    if (scope == null) {
      sparseData = this.loadData(instanceLabel,
          properties.getProperty("numericWordQuery"),
          properties.getProperty("nominalWordQuery"),
          properties.getProperty("prepareScript"),
          properties.getProperty("prepareScriptDelimiter", ";"),
          bDecorator, null, null, null);
    }
    formatter.initializeExport(instanceLabel, properties, sparseData);
    for (String label : instanceLabel.getLabelToInstanceMap().keySet()) {
      if ("label".equals(scope)) {
        sparseData = this.loadData(instanceLabel,
            properties.getProperty("numericWordQuery"),
            properties.getProperty("nominalWordQuery"),
            properties.getProperty("prepareScript"),
            properties.getProperty("prepareScriptDelimiter", ";"),
            bDecorator, label, null, null);
      }
      formatter
          .initializeLabel(label, instanceLabel
              .getLabelToInstanceMap().get(label), properties,
              sparseData);
      for (int run : instanceLabel.getLabelToInstanceMap().get(label)
          .keySet()) {
        for (int fold : instanceLabel.getLabelToInstanceMap()
            .get(label).get(run).keySet()) {
          if (log.isInfoEnabled()
              && (label.length() > 0 || run > 0 || fold > 0))
            log.info("exporting, label " + label + " run " + run
                + " fold " + fold);
          if ("fold".equals(scope)) {
            sparseData = this.loadData(instanceLabel, properties
                .getProperty("numericWordQuery"), properties
                .getProperty("nominalWordQuery"), properties
                .getProperty("prepareScript"), properties
                .getProperty("prepareScriptDelimiter", ";"),
                bDecorator, label, fold, run);
          }
          formatter.initializeFold(sparseData, label, run, fold,
              instanceLabel.getLabelToInstanceMap().get(label)
                  .get(run).get(fold));
          for (boolean train : instanceLabel.getLabelToInstanceMap()
              .get(label).get(run).get(fold).keySet()) {
            formatter.exportFold(sparseData, instanceLabel
                .getLabelToInstanceMap().get(label).get(run)
                .get(fold).get(train), train, label,
                0 == run ? null : run, 0 == fold ? null : fold);
          }
          formatter.clearFold();
        }
      }
      formatter.clearLabel();
    }
  }

  /*
   * (non-Javadoc)
   *
   * @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.util.Properties,
   * org.apache.ctakes.ytex.kernel.SparseDataFormatter, org.apache.ctakes.ytex.kernel.BagOfWordsDecorator)
   */
  @Override
  public void exportData(Properties props, SparseDataFormatter formatter,
      BagOfWordsDecorator bDecorator) throws IOException {
    InstanceData instanceLabel = this.getKernelUtil().loadInstances(
        props.getProperty("instanceClassQuery"));
    if (props.containsKey("folds")) {
      this.getKernelUtil().generateFolds(instanceLabel, props);
    }
    // load label - instance id maps
    // sparseData.setLabelToInstanceMap(this.getKernelUtil().loadInstances(
    // props.getProperty("instanceClassQuery"),
    // sparseData.getLabelToClassMap()));
    this.exportData(instanceLabel, formatter, props, bDecorator);
    // this.loadData(sparseData,
    // props.getProperty("numericWordQuery"),
    // props.getProperty("nominalWordQuery"), bDecorator);
    // this.exportData(sparseData, formatter, props);
  }

  /*
   * (non-Javadoc)
   *
   * @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.lang.String,
   * java.lang.String)
   */
  @Override
  public void exportData(String propertiesFile, String format)
      throws IOException, InvalidPropertiesFormatException {
    Properties props = new Properties();
    this.getKernelUtil().loadProperties(propertiesFile, props);
    this.exportData(props, nameToFormatterMap.get(format.toLowerCase())
        .getFormatter(), null);
  }

  public DataSource getDataSource(DataSource ds) {
    return this.jdbcTemplate.getDataSource();
  }

  public KernelUtil getKernelUtil() {
    return kernelUtil;
  }

  public Map<String, SparseDataFormatterFactory> getNameToFormatterMap() {
    return nameToFormatterMap;
  }

  /**
   * run the prepare script if defined.
   *
   * @param prepareScript
   *            sequence of sql statements to be executed with named params.
   * @param prepareScriptDelimiter
   *            delimiter separating the sql statements.
   * @param params
   *            for named parameters in sql statements.
   */
  protected void prepare(final String prepareScript,
      final String prepareScriptDelimiter,
      final Map<String, Object> params) {
    if (prepareScript != null && prepareScript.length() > 0) {
      String[] statements = prepareScript.split(prepareScriptDelimiter);
      // throw out empty lines
      for (String sql : statements) {
        if (sql != null && sql.trim().length() > 0) {
          this.namedJdbcTemplate.update(sql, params);
        }
      }
    }
  }

  /**
   *
   * @param sql
   *            result set has 3 columns. 1st column - integer - instance id.
   *            2nd column - word. 3rd column - word value.
   * @param instanceWordMap
   *            map of instance id to word-word value.
   * @param wordValueMap
   *            map of word to valid values for the word.
   * @return populate maps with results of query.
   */
  protected void getNominalInstanceWords(final String sql,
      final String prepareScript, final String prepareScriptDelimiter,
      final SparseData sparseData, final Map<String, Object> params) {
    txTemplateNew.execute(new TransactionCallback<Object>() {

      // new PreparedStatementCreator() {
      // @Override

      // public PreparedStatement createPreparedStatement(
      // Connection conn) throws SQLException {
      // return conn.prepareStatement(sql,
      // ResultSet.TYPE_FORWARD_ONLY,
      // ResultSet.CONCUR_READ_ONLY);
      // }
      //
      // } @Override
      public Object doInTransaction(TransactionStatus txStatus) {
        prepare(prepareScript, prepareScriptDelimiter, params);
        namedJdbcTemplate.query(sql, params, new RowCallbackHandler() {

          @Override
          public void processRow(ResultSet rs) throws SQLException {
            long instanceId = rs.getLong(1);
            String word = rs.getString(2);
            String wordValue = rs.getString(3);
            addNominalWordToInstance(sparseData, instanceId, word,
                wordValue);
          }
        });
        return null;
      }
    });
  }

  /**
   *
   * @param sql
   *            result 1st column: instance id, 2nd column: word, 3rd column:
   *            numeric word value
   * @param instanceNumericWords
   *            map of instance id - [map word - word value] to be populated
   */
  protected void getNumericInstanceWords(final String sql,
      final String prepareScript, final String prepareScriptDelimiter,
      final SparseData sparseData, final Map<String, Object> params) {
    txTemplateNew.execute(new TransactionCallback<Object>() {

      @Override
      public Object doInTransaction(TransactionStatus txStatus) {
        prepare(prepareScript, prepareScriptDelimiter, params);
        namedJdbcTemplate.query(sql, params
        // new PreparedStatementCreator() {
        //
        // @Override
        // public PreparedStatement createPreparedStatement(
        // Connection conn) throws SQLException {
        // return conn.prepareStatement(sql,
        // ResultSet.TYPE_FORWARD_ONLY,
        // ResultSet.CONCUR_READ_ONLY);
        // }
        //
        // }
            , new RowCallbackHandler() {

              @Override
              public void processRow(ResultSet rs)
                  throws SQLException {
                long instanceId = rs.getLong(1);
                String word = rs.getString(2);
                double wordValue = rs.getDouble(3);
                addNumericWordToInstance(sparseData,
                    instanceId, word, wordValue);
              }
            });
        return null;
      }

    });
  }

  public TransactionTemplate getTxTemplateNew() {
    return txTemplateNew;
  }

  /**
   *
   * @param instanceLabel
   *            instance data: label - fold - instance id - class map
   * @param instanceNumericWordQuery
   *            query to get numeric attributes
   * @param instanceNominalWordQuery
   *            query to get nominal attributes
   * @param prepareScript
   *            prepare script to be executed in same tx as instance attribute
   *            queries
   * @param prepareScriptDelimiter
   *            delimiter for statements in prepare script
   * @param bDecorator
   *            decorator to add attributes
   * @param label
   * @param fold
   * @param run
   * @return
   */
  protected SparseData loadData(InstanceData instanceLabel,
      String instanceNumericWordQuery, String instanceNominalWordQuery,
      String prepareScript, String prepareScriptDelimiter,
      BagOfWordsDecorator bDecorator, String label, Integer fold,
      Integer run) {
    SparseData sparseData = new SparseData();
    Map<String, Object> params = new HashMap<String, Object>();
    if (label != null && label.length() > 0)
      params.put("label", label);
    if (fold != null && fold != 0)
      params.put("fold", fold);
    if (run != null && run != 0)
      params.put("run", run);
    // load numeric attributes
    if (instanceNumericWordQuery != null
        && instanceNumericWordQuery.trim().length() > 0)
      this.getNumericInstanceWords(instanceNumericWordQuery,
          prepareScript, prepareScriptDelimiter, sparseData, params);
    // added to support adding gram matrix index in GramMatrixExporter
    if (bDecorator != null)
      bDecorator.decorateNumericInstanceWords(
          sparseData.getInstanceNumericWords(),
          sparseData.getNumericWords());
    // load nominal attributes
    if (instanceNominalWordQuery != null
        && instanceNominalWordQuery.trim().length() > 0)
      this.getNominalInstanceWords(instanceNominalWordQuery,
          prepareScript, prepareScriptDelimiter, sparseData, params);
    if (bDecorator != null)
      bDecorator.decorateNominalInstanceWords(
          sparseData.getInstanceNominalWords(),
          sparseData.getNominalWordValueMap());
    return sparseData;
  }

  public void setDataSource(DataSource ds) {
    this.jdbcTemplate = new JdbcTemplate(ds);
    this.simpleJdbcTemplate = new SimpleJdbcTemplate(ds);
    this.namedJdbcTemplate = new NamedParameterJdbcTemplate(ds);
  }

  public void setKernelUtil(KernelUtil kernelUtil) {
    this.kernelUtil = kernelUtil;
  }

  public void setNameToFormatterMap(
      Map<String, SparseDataFormatterFactory> nameToFormatterMap) {
    this.nameToFormatterMap = nameToFormatterMap;
  }

  public void setTxTemplateNew(TransactionTemplate txTemplateNew) {
    this.txTemplateNew = txTemplateNew;
  }
}
TOP

Related Classes of org.apache.ctakes.ytex.kernel.SparseDataExporterImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.