Package net.bpiwowar.mg4j.extensions.utils

Source Code of net.bpiwowar.mg4j.extensions.utils.TermUtil

/**
* $Author:$
* $Id:$
* $Rev:$
*/

package net.bpiwowar.mg4j.extensions.utils;

import it.unimi.di.big.mg4j.index.TermProcessor;
import it.unimi.dsi.lang.MutableString;
import net.bpiwowar.mg4j.extensions.conf.IndexConfiguration;
import net.bpiwowar.mg4j.extensions.query.*;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.log4j.Logger;

import java.util.Map;

/**
* Code previously found in MG4JScorer but needed elsewhere as well
*
* @author <a href="mailto:benjamin@bpiwowar.net">Benjamin Piwowarski</a>
*
*/
public class TermUtil {
  final static Logger logger = Logger.getLogger(TermUtil.class);
 
  static public void getPositiveTerms(
            Query query,
      Map<String, MutableInt> terms, TermProcessor processor,
      IndexConfiguration index) {
    if (query instanceof COQuery)
      getPositiveTerms((COQuery) query, terms, processor, index);
//    else if (query instanceof TopicOperator)
//      getPositiveTerms((TopicOperator) query, terms, processor, index);
    else if (query instanceof StringQuery)
      getPositiveTerms((StringQuery) query, terms, processor, index);
    else
      throw new NotImplementedException(String.format(
          "Cannot handle query of class %s", query.getClass()));
  }

  static public void getPositiveTerms(StringQuery query,
      Map<String, MutableInt> termMap, TermProcessor processor,
      IndexConfiguration index) {
    // Transform into a CO query
    COQuery coQuery = new COQuery();
    final Requirement req = new Requirement(new SimpleQuery(query.getQuery()));
    coQuery.add(req);
   
    // ... and then perform!
    getPositiveTerms(coQuery, termMap, processor, index);
  }

  static public void getPositiveTerms(COQuery coQuery,
      Map<String, MutableInt> terms, TermProcessor processor,
      IndexConfiguration index) {
    for (Requirement req : coQuery.requirements)
      getPositiveTerms(req, terms, processor, index);
  }

  static private void getPositiveTerms(Requirement req,
      Map<String, MutableInt> terms, TermProcessor processor,
      IndexConfiguration index) {
    for (Text text : req.terms)
      getPositiveTerms(text, terms, processor, index);
  }

  /**
   * Get query terms which are not negative
   *
   * @param text
   * @param terms
   */
  static private void getPositiveTerms(Text text,
      Map<String, MutableInt> terms, TermProcessor processor,
      IndexConfiguration index) {
    // Skip negative terms
    if (text.isNegative())
      return;

    // Add the terms from the query
    if (text instanceof Term) {
      addTerm(((Term) text).word, terms, processor, index);
    } else if (text instanceof Phrase) {
      for (Term term : ((Phrase) text).terms)
        addTerm(term.word, terms, processor, index);
    } else
      throw new RuntimeException(String.format("Unknown text type: %s", text.getClass()));
  }

//  static public void getPositiveTerms(TopicOperator operator,
//      final Map<String, MutableInt> terms, final TermProcessor processor,
//      final IndexConfiguration index) {
//    operator.apply(new OperatorTransformer() {
//      @Override
//      public Operator transform(Operator parent, Operator operator) {
//        if (operator instanceof Projection
//            && ((Projection) operator).isOrthogonal()) {
//          ((Projection) operator).getOperator().applyToSuboperators(
//              this);
//        } else if (operator instanceof TermOperator) {
//          addTerm(((TermOperator) operator).getTerm(), terms,
//              processor, index);
//        } else {
//          operator.applyToSuboperators(this);
//        }
//        return operator;
//      }
//    });
//  }
//
  /**
   * Add a term if present in the index Try to split a word containing dash
   * "-" if not present
   *
   * @param text
   * @param terms
   */
  static private void addTerm(String text, Map<String, MutableInt> terms,
      TermProcessor processor, IndexConfiguration index) {
    MutableString word = new MutableString(text);
    // TODO: Annalina - when the query transformer is implemented, remove
    // the following and just keep the "update" line
    if (processor.processTerm(word)) {
      if (index.getTermId(word) != -1) {
        update(terms, word.toString());
        return;
      }
    }

    String[] array = text.split("-");
    if (array.length > 1) {
      logger
          .info(String.format(
              "Split the dash separted word %s since it is not in the index",
              text));
      for (String s : array)
        addTerm(s, terms, processor, index);
    }
  }


  // --- Get sets of terms

  static public void update(Map<String, MutableInt> terms, String word) {
    MutableInt v = terms.get(word);
    if (v == null)
      v = terms.put(word, new MutableInt(1));
    else
      v.add(1);

  }
}
TOP

Related Classes of net.bpiwowar.mg4j.extensions.utils.TermUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.