Package org.maltparserx

Source Code of org.maltparserx.MaltParserService

package org.maltparserx;

import java.util.Iterator;

import org.maltparserx.core.exception.MaltChainedException;
import org.maltparserx.core.flow.FlowChartInstance;
import org.maltparserx.core.io.dataformat.ColumnDescription;
import org.maltparserx.core.io.dataformat.DataFormatInstance;
import org.maltparserx.core.io.dataformat.DataFormatSpecification;
import org.maltparserx.core.options.OptionManager;
import org.maltparserx.core.symbol.SymbolTable;
import org.maltparserx.core.symbol.SymbolTableHandler;
import org.maltparserx.core.symbol.trie.TrieSymbolTableHandler;
import org.maltparserx.core.syntaxgraph.DependencyGraph;
import org.maltparserx.core.syntaxgraph.DependencyStructure;
import org.maltparserx.core.syntaxgraph.edge.Edge;
import org.maltparserx.core.syntaxgraph.node.DependencyNode;
import org.maltparserx.parser.SingleMalt;

/**
* The purpose of MaltParserService is to easily write third-party programs that uses MaltParser.
*
*  There are two ways to call the MaltParserService:
*  1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
*  2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
*  done by the third-party program.
*  How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
*
* @author Johan Hall
*/
public class MaltParserService {
//  private URL urlMaltJar;
  private Engine engine;
  private FlowChartInstance flowChartInstance;
  private DataFormatInstance dataFormatInstance;
  private SingleMalt singleMalt;
  private int optionContainer;
  private boolean initialized = false;
 
  /**
   * Creates a MaltParserService with the option container 0
   *
   * @throws MaltChainedException
   */
  public MaltParserService() throws MaltChainedException {
    this(0);
  }
 
  /**
   * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program
   * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
   * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
   *
   * @param optionContainer an integer from 0 to max value of data type Integer
   * @throws MaltChainedException
   */
  public MaltParserService(int optionContainer) throws MaltChainedException {
    setOptionContainer(optionContainer);
    initialize();
  }
 
  /**
   * Use this constructor only when you want a MaltParserService without an option manager. Without the option manager MaltParser cannot
   * load or create a parser model.
   *
   * @param optionFreeInitialization true, means that MaltParserService is created without an option manager, false will do the same as MaltParserService().
   * @throws MaltChainedException
   */
  public MaltParserService(boolean optionFreeInitialization) throws MaltChainedException {
    if (optionFreeInitialization == false) {
      setOptionContainer(0);
      initialize();
    } else {
      setOptionContainer(-1);
    }
  }
 
  /**
   * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
   *
   * @param commandLine a commandLine string that controls the MaltParser.
   * @throws MaltChainedException
   */
  public void runExperiment(String commandLine) throws MaltChainedException {
    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
    engine = new Engine();
    engine.initialize(optionContainer);
    engine.process(optionContainer);
    engine.terminate(optionContainer);
  }
 
  /**
   * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
   *
   * @param commandLine a commandLine string that controls the MaltParser
   * @throws MaltChainedException
   */
  public void initializeParserModel(String commandLine) throws MaltChainedException {
    if (optionContainer == -1) {
      throw new MaltChainedException("MaltParserService has been initialized as an option free initialization and therefore no parser model can be initialized.");
    }
    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
    // Creates an engine
    engine = new Engine();
    // Initialize the engine with option container and gets a flow chart instance
    flowChartInstance = engine.initialize(optionContainer);
    // Runs the preprocess chart items of the "parse" flow chart
    if (flowChartInstance.hasPreProcessChartItems()) {
      flowChartInstance.preprocess();
    }
    singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparserx.parser.SingleMalt.class, "singlemalt");
    singleMalt.getConfigurationDir().initDataFormat();
    dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
        singleMalt.getSymbolTables(),
        OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value"));
    initialized = true;
  }
 

 
  /**
   * Parses an array of tokens and returns a dependency structure.
   *
   * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel().
   *
   * @param tokens an array of tokens
   * @return a dependency structure
   * @throws MaltChainedException
   */
  public DependencyStructure parse(String[] tokens) throws MaltChainedException {
    if (!initialized) {
      throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
    }
    if (tokens == null || tokens.length == 0) {
      throw new MaltChainedException("Nothing to parse. ");
    }

    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
   
    for (int i = 0; i < tokens.length; i++) {
      Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
      DependencyNode node = outputGraph.addDependencyNode(i+1);
      String[] items = tokens[i].split("\t");
      for (int j = 0; j < items.length; j++) {
        if (columns.hasNext()) {
          ColumnDescription column = columns.next();
          if (column.getCategory() == ColumnDescription.INPUT && node != null) {
            outputGraph.addLabel(node, column.getName(), items[j]);
          }
        }
      }
    }
    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
    // Invoke parse with the output graph
    singleMalt.parse(outputGraph);
    return outputGraph;
  }
 
  /**
   * Converts an array of tokens to a dependency structure.
   *
   * Note that this method uses the same data format specification and symbol table as the parser engine. This can cause problem in multi-threaded
   * environment.
   *
   * Please use (in multi-threaded environment)
   * toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification)
   * or
   * toDependencyStructure(String[] tokens, String dataFormatFileName)
   *
   * @param tokens an array of tokens
   * @return a dependency structure
   * @throws MaltChainedException
   */
  public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
    if (!initialized) {
      throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
    }
    if (tokens == null || tokens.length == 0) {
      throw new MaltChainedException("Nothing to convert. ");
    }
    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
   
    for (int i = 0; i < tokens.length; i++) {
      Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
      DependencyNode node = outputGraph.addDependencyNode(i+1);
      String[] items = tokens[i].split("\t");
      Edge edge = null;
      for (int j = 0; j < items.length; j++) {
        if (columns.hasNext()) {
          ColumnDescription column = columns.next();
          if (column.getCategory() == ColumnDescription.INPUT && node != null) {
            outputGraph.addLabel(node, column.getName(), items[j]);
          } else if (column.getCategory() == ColumnDescription.HEAD) {
            if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
              edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
            }
          } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
            outputGraph.addLabel(edge, column.getName(), items[j]);
          }
        }
      }
    }
    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
    return outputGraph;
  }
 
  /**
   * Reads the data format specification file
   *
   * @param dataFormatFileName the path to the data format specification file
   * @return a data format specification
   * @throws MaltChainedException
   */
  public DataFormatSpecification readDataFormatSpecification(String dataFormatFileName) throws MaltChainedException {
    DataFormatSpecification dataFormat = new DataFormatSpecification();
    dataFormat.parseDataFormatXMLfile(dataFormatFileName);
    return dataFormat;
  }
 
  /**
   * Converts an array of tokens to a dependency structure
   *
   * @param tokens tokens an array of tokens
   * @param dataFormatSpecification a data format specification
   * @return a dependency structure
   * @throws MaltChainedException
   */
  public DependencyStructure toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification) throws MaltChainedException {
    // Creates a symbol table handler
    SymbolTableHandler symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
   
    // Initialize data format instance
    DataFormatInstance dataFormatInstance = dataFormatSpecification.createDataFormatInstance(symbolTables, "none");

    // Creates a dependency graph
    if (tokens == null || tokens.length == 0) {
      throw new MaltChainedException("Nothing to convert. ");
    }
    DependencyStructure outputGraph = new DependencyGraph(symbolTables);
   
    for (int i = 0; i < tokens.length; i++) {
      Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
      DependencyNode node = outputGraph.addDependencyNode(i+1);
      String[] items = tokens[i].split("\t");
      Edge edge = null;
      for (int j = 0; j < items.length; j++) {
        if (columns.hasNext()) {
          ColumnDescription column = columns.next();
          if (column.getCategory() == ColumnDescription.INPUT && node != null) {
            outputGraph.addLabel(node, column.getName(), items[j]);
          } else if (column.getCategory() == ColumnDescription.HEAD) {
            if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
              edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
            }
          } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
            outputGraph.addLabel(edge, column.getName(), items[j]);
          }
        }
      }
    }
    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
    return outputGraph;
  }
 
  /**
   * Converts an array of tokens to a dependency structure
   *
   * @param tokens an array of tokens
   * @param dataFormatFileName the path to the data format file
   * @return a dependency structure
   * @throws MaltChainedException
   */
  public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException {
    return toDependencyStructure(tokens, readDataFormatSpecification(dataFormatFileName));
  }
 
  /**
   * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
   *
   * @param tokens an array of tokens to parse
   * @return an array of tokens with a head index and a dependency type at the end of string
   * @throws MaltChainedException
   */
  public String[] parseTokens(String[] tokens) throws MaltChainedException {
    DependencyStructure outputGraph = parse(tokens);
    StringBuilder sb = new StringBuilder();
    String[] outputTokens = new String[tokens.length];
    SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
    for (Integer index : outputGraph.getTokenIndices()) {
      sb.setLength(0);
      if (index <= tokens.length) {
        DependencyNode node = outputGraph.getDependencyNode(index);
        sb.append(tokens[index -1]);
        sb.append('\t');
        sb.append(node.getHead().getIndex());
        sb.append('\t');
        if (node.getHeadEdge().hasLabel(deprelTable)) {
          sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
        } else {
          sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
        }
        outputTokens[index-1] = sb.toString();
      }
    }
    return outputTokens;
  }
 
  /**
   * Terminates the parser model.
   *
   * @throws MaltChainedException
   */
  public void terminateParserModel() throws MaltChainedException {
    if (!initialized) {
      throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
    }
    // Runs the postprocess chart items of the "parse" flow chart
    if (flowChartInstance.hasPostProcessChartItems()) {
      flowChartInstance.postprocess();
    }
   
    // Terminate the flow chart with an option container
    engine.terminate(optionContainer);
  }
 
  private void initialize() throws MaltChainedException {
    if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
      return; // OptionManager is already initialized
    }
    OptionManager.instance().loadOptionDescriptionFile();
    OptionManager.instance().generateMaps();
  }
 
 
  /**
   * Returns the option container index
   *
   * @return the option container index
   */
  public int getOptionContainer() {
    return optionContainer;
  }

  private void setOptionContainer(int optionContainer) {
    this.optionContainer = optionContainer;
  }

  /**
   * Returns the path of malt.jar file
   *
   * @return the path of malt.jar file
   */
//  public static String getMaltJarPath() {
//    if (SystemInfo.getMaltJarPath() != null) {
//      return SystemInfo.getMaltJarPath().toString();
//    }
//    return null;
//  }
 
 
}
TOP

Related Classes of org.maltparserx.MaltParserService

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.