Source Code of com.tamingtext.qa.QuestionQParserPlugin

/*
 * Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 * -------------------
 * To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
 * http://www.manning.com/ingersoll
 */


package com.tamingtext.qa;




import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;


import opennlp.maxent.io.SuffixSensitiveGISModelReader;
import opennlp.model.MaxentModel;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.parser.Parser;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;


import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;


/**
 *
 *
 **/
public class QuestionQParserPlugin extends QParserPlugin {


  private Map<String, String> answerTypeMap;
  protected MaxentModel model;
  protected double[] probs;
  protected AnswerTypeContextGenerator atcg;
  private POSTaggerME tagger;
  private ChunkerME chunker;


  //<start id="qqpp.create"/>
  @Override
  public QParser createParser(String qStr, SolrParams localParams, SolrParams params,
                              SolrQueryRequest req) {
    answerTypeMap = new HashMap<String, String>();//<co id="qqpp.atm"/>
    answerTypeMap.put("L", "NE_LOCATION");
    answerTypeMap.put("T", "NE_TIME|NE_DATE");
    answerTypeMap.put("P", "NE_PERSON");
    answerTypeMap.put("O", "NE_ORGANIZATION");
    QParser qParser;
    if (params.getBool(QAParams.COMPONENT_NAME, false) == true //<co id="qqpp.explainif"/>
            && qStr.equals("*:*") == false) {
      AnswerTypeClassifier atc =
              new AnswerTypeClassifier(model, probs, atcg);//<co id="qqpp.atc"/>
      Parser parser = new ChunkParser(chunker, tagger);//<co id="qqpp.parser"/>
      qParser = new QuestionQParser(qStr, localParams, //<co id="qqpp.construct"/>
              params, req, parser, atc, answerTypeMap);
    } else {
      //just do a regular query if qa is turned off
      qParser = req.getCore().getQueryPlugin("edismax")
              .createParser(qStr, localParams, params, req);
    }
    return qParser;
  }
  /*
  <calloutlist>
      <callout arearefs="qqpp.atm"><para>Construct a map of the answer types that we are interested in handling, for instance locations, people and times and dates.</para></callout>
      <callout arearefs="qqpp.explainif"><para>We use this if clause to create an regular Solr query parser in the cases where the user hasn't entered a question or the enter the *:* query (<classname>MatchAllDocsQuery</classname>.</para></callout>
      <callout arearefs="qqpp.atc"><para>The <classname>AnswerTypeClassifier</classname> uses the trained Answer Type model (located in the models directory) to classify the question.</para></callout>
      <callout arearefs="qqpp.parser"><para>Construct the chunker (parser) that will be responsible for parsing the user question.</para></callout>
      <callout arearefs="qqpp.construct"><para>Create the <classname>QuestionQParser</classname> by passing in the user's question as well as the pre-initialized resources from the init method.</para></callout>
  </calloutlist>
  */
  //<end id="qqpp.create"/>


  //<start id="qqpp.init"/>
  public void init(NamedList initArgs) {
    SolrParams params = SolrParams.toSolrParams(initArgs);
    String modelDirectory = params.get("modelDirectory",
            System.getProperty("model.dir"));//<co id="qqpp.model"/>
    String wordnetDirectory = params.get("wordnetDirectory",
            System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/>
    if (modelDirectory != null) {
      File modelsDir = new File(modelDirectory);
      try {
        InputStream chunkerStream = new FileInputStream(
            new File(modelsDir,"en-chunker.bin"));
        ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
        chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/>
        InputStream posStream = new FileInputStream(
            new File(modelsDir,"en-pos-maxent.bin"));
        POSModel posModel = new POSModel(posStream);
        tagger =  new POSTaggerME(posModel); //<co id="qqpp.tagger"/>
        model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/>
            new File(modelDirectory,"en-answer.bin")))
            .getChunkerModel();
        probs = new double[model.getNumOutcomes()];
        atcg = new AnswerTypeContextGenerator(
                new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/>
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }
  /*
  <calloutlist>
      <callout arearefs="qqpp.model"><para>The model directory contains all of the OpenNLP models that we use throughout the book.</para></callout>
      <callout arearefs="qqpp.wordnet"><para>WordNet is a lexical resource used to assist in the Answer Type identification process.</para></callout>
      <callout arearefs="qqpp.chunker"><para>The Treebank Chunker works with a Parser to do shallow parsing of questions</para></callout>
      <callout arearefs="qqpp.tagger"><para>The tagger is responsible for Part of Speech Tagging</para></callout>
      <callout arearefs="qqpp.theModel"><para>Create the actual model and save it for reuse, as it is thread safe, but the containing class is not.</para></callout>
      <callout arearefs="qqpp.context"><para>Create the AnswerTypeContextGenerator, which is responsible for feature selection.</para></callout>


  </calloutlist>
  */
  //<end id="qqpp.init"/>
}
Source Code of com.tamingtext.qa.QuestionQParserPlugin

Related Classes of com.tamingtext.qa.QuestionQParserPlugin