Package domain

Source Code of domain.Text

/**
*
* @author marc.molins.piulachs
*/

package domain;


import java.util.StringTokenizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;


public class Text {

  private final static String PUNCTUATION_MARKS = "?!.;,:()[]{}<>\"\'—¿¡«»*";

  private String _text;
  private String _cleanText;
  private int _totalWords;
  private ArrayList< Pair<Pair<String,String>,Pair<Integer,Boolean>> > _verbsInfinitivesPositionVisited;
  private ArrayList< Integer > _positions;
  private ArrayList< Pair<String,Float> >  _currentNounsAndProb;
  private int _currentNominalization;
  private int _totalVerbs; // ==_infinitivesAndPos.size();
  private Boolean _startedNominalization;
  private VerbDictionary _verbDict;
  private TestDictionary _nounDict;
  private int _learntRules;
  private int _learntPairs;
  private String _mostCommonVerb;
    private UserStatistics _uStats;
    private TextStatistics _tStats;
  private String _auxText;
 

  public Text(VerbDictionary verbDict, TestDictionary nounDict) {
    _text = null;
    _cleanText = null;
    _totalWords = 0;
    _currentNominalization = 0;
    _totalVerbs = 0;
    _startedNominalization = false;
    _verbDict = verbDict;
    _nounDict = nounDict;
    _positions = new ArrayList<Integer>();
    _verbsInfinitivesPositionVisited = new ArrayList< Pair<Pair<String,String>,Pair<Integer,Boolean>> >();
    _currentNounsAndProb = new ArrayList< Pair<String,Float> >();
    _learntRules = 0;
    _learntPairs = 0;
    _mostCommonVerb = "-";
    _uStats = UserStatistics.getInstance();
    _tStats = TextStatistics.getInstance();
    _tStats.setTotalWords(0);
    _tStats.setMarkedWords(0);
    _tStats.setLearntPairs(0);
    _tStats.setLearntRules(0);
    _tStats.setMostCommonVerb("-");

  }



  private Boolean isAuxVerb(String verb) {
   
    ArrayList<String> auxVerbsList = new ArrayList<String>(Arrays.asList("have",
    "has", "had", "shall", "will", "should", "would", "am", "are", "is",
    "be", "been", "was"  , "were", "let", "do", "does", "did", "can"
    , "could", "may", "might", "ought", "must", "doing", "having"

    /*"haven't", "hasn't", "hadn't", "shalln't", "shan't", "won't", "shouldn't",
    "wouldn't", "ain't", "aren't", "isn't", "wasn't", "weren't", "let's",
    "don't", "doesn't", "didn't", "can't", "couldn't", "mayn't", "mightn't"
    , "oughtn't", "mustn't"*/));

    Iterator i = auxVerbsList.iterator();
    Boolean found = false;
    while(!found && i.hasNext()) {
      found = verb.equalsIgnoreCase((String) i.next());
    }
                return found;
   
  }



  private void recogniseVerbs(String text) {
   
    StringTokenizer tokens = new StringTokenizer(text);
    String word1, word2, infinitive, infinitive2;
    Boolean moreThanOne = false;

    if (tokens.hasMoreTokens()) {
      word1 = tokens.nextToken();
      ++_totalWords;

      while (tokens.hasMoreTokens()) {
        word2 = tokens.nextToken();
        ++_totalWords;
        moreThanOne = true;

        infinitive = _verbDict.getInfinitive(word1);
        if (infinitive != null) {
          if (!isAuxVerb(word1)) {
            Pair<String,String> pair1 = new Pair<String,String>();
            Pair<Integer,Boolean> pair2 = new Pair<Integer,Boolean>();
            Pair<Pair<String,String>,Pair<Integer,Boolean>> infAndPos = new Pair<Pair<String,String>,Pair<Integer,Boolean>>();
            pair1.first = word1;
            pair1.second = infinitive;
            pair2.first = _totalWords - 1;
            pair2.second = false;
            infAndPos.first = pair1;
            infAndPos.second = pair2;
            _verbsInfinitivesPositionVisited.add(infAndPos);
            _positions.add(_totalWords - 1);
            ++_totalVerbs;
          }
          else {
            infinitive2 = _verbDict.getInfinitive(word2);
            if (infinitive2 == null) {
              Pair<String,String> pair1 = new Pair<String,String>();
              Pair<Integer,Boolean> pair2 = new Pair<Integer,Boolean>();
              Pair<Pair<String,String>,Pair<Integer,Boolean>> infAndPos = new Pair<Pair<String,String>,Pair<Integer,Boolean>>();
              pair1.first = word1;
              pair1.second = infinitive;
              pair2.first = _totalWords - 1;
              pair2.second = false;
              infAndPos.first = pair1;
              infAndPos.second = pair2;
              _verbsInfinitivesPositionVisited.add(infAndPos);
              _positions.add(_totalWords - 1);
              ++_totalVerbs;
            }
          }
        }
        word1 = word2;
      }

      if (moreThanOne) {
        infinitive = _verbDict.getInfinitive(word1);
        if (infinitive != null) {
          Pair<String,String> pair1 = new Pair<String,String>();
          Pair<Integer,Boolean> pair2 = new Pair<Integer,Boolean>();
          Pair<Pair<String,String>,Pair<Integer,Boolean>> infAndPos = new Pair<Pair<String,String>,Pair<Integer,Boolean>>();
          pair1.first = word1;
          pair1.second = infinitive;
          pair2.first = _totalWords;
          pair2.second = false;
          infAndPos.first = pair1;
          infAndPos.second = pair2;
          _verbsInfinitivesPositionVisited.add(infAndPos);
          _positions.add(_totalWords);
          ++_totalVerbs;
        }
      }

      else if(_totalWords == 1) {
        infinitive = _verbDict.getInfinitive(word1);
        if (infinitive != null) {
          Pair<String,String> pair1 = new Pair<String,String>();
          Pair<Integer,Boolean> pair2 = new Pair<Integer,Boolean>();
          Pair<Pair<String,String>,Pair<Integer,Boolean>> infAndPos = new Pair<Pair<String,String>,Pair<Integer,Boolean>>();
          pair1.first = word1;
          pair1.second = infinitive;
          pair2.first = 1;
          pair2.second = false;
          infAndPos.first = pair1;
          infAndPos.second = pair2;
          _verbsInfinitivesPositionVisited.add(infAndPos);
          _positions.add(1);
          ++_totalVerbs;
        }
      }
    }
  }



  private void nominalization() {

    _currentNounsAndProb = new ArrayList< Pair<String,Float> >();
   
    ArrayList<Three<String, Float, ArrayList<Pair<String, String>>>> aux
        = new ArrayList<Three<String, Float, ArrayList<Pair<String, String>>>>();

    aux = _nounDict.getCorrectList(_verbsInfinitivesPositionVisited.get(_currentNominalization).first.second);

    String verb = _verbsInfinitivesPositionVisited.get(_currentNominalization).first.first;
    String infinitive = _verbsInfinitivesPositionVisited.get(_currentNominalization).first.second;
    int position = _verbsInfinitivesPositionVisited.get(_currentNominalization).second.first;
    Boolean visited = _verbsInfinitivesPositionVisited.get(_currentNominalization).second.second;

    Boolean empty = true;

//    System.out.println("\nVERB: " + verb);
//    System.out.println("INFINITIVE: " + infinitive);
//    System.out.println("WORD NUMBER: " + position + "\n");
//    System.out.println("________________________");
   
    Iterator i = aux.iterator();
    while(i.hasNext()) {
      empty = false;
      Three<String, Float, ArrayList<Pair<String, String>>> three = (Three<String, Float, ArrayList<Pair<String, String>>>) i.next();
      Pair<String,Float> p = new Pair<String,Float>();

      p.first = three.correctWord;
      p.second = three.probability;
      _currentNounsAndProb.add(p);

//      System.out.println("\nnoun: " + three.correctWord);
//      System.out.println("probability: " + three.probability);
//      System.out.println("________________________");

      if (!visited) {
        Iterator j = three.rules.iterator();
        Pair<Pair<String,String>,Pair<Integer,Boolean>> pairpair = _verbsInfinitivesPositionVisited.get(_currentNominalization);
        pairpair.second.second = true;
        _verbsInfinitivesPositionVisited.set(_currentNominalization, pairpair);

//        Pair<String,String> p2 = new Pair<String,String>();
//        p2.first = three.correctWord;
//        p2.second = infinitive;
//        //p2.second = verb;
//        _newTraining.add(p2);

        boolean first = true;
        while (j.hasNext()) {
          Pair<String,String> pair = (Pair<String,String>) j.next();
          //_newTraining.add(pair);
          if (!first) {
            ++_learntRules;
            _uStats.increaseLearntRules(1);
            _tStats.increaseLearntRules(1);
          }
          first = false;
        }
      }
    }
    if (empty) {
//      System.out.println("No nominalizations");
//      System.out.println("________________________");
    }

  }



  public Pair<String,String> favoriteNominalization(int pos) {
    Pair<String,String> p = new Pair<String,String>();
    p.first = _currentNounsAndProb.get(pos).first;
    p.second = _verbsInfinitivesPositionVisited.get(_currentNominalization).first.second;
    //p.second = verb;
    _uStats.increaseLearntPairs(1);
    _tStats.increaseLearntPairs(1);
    return p;

  }



  private Boolean isPunctuationMark(char c) {
        return PUNCTUATION_MARKS.indexOf(c) != -1;
    }



  private String cleanString(String s) {

        StringBuilder temp = new StringBuilder(s.length());

        for (int i = 0; i < s.length(); ++i) {

      if (!isPunctuationMark(s.charAt(i))) {
                temp.append(Character.toLowerCase(s.charAt(i)));
            }

        }
        return temp.toString();
    }



  public void nextNominalization() {

    if(_startedNominalization) {
      if (_totalVerbs > 0) {

        if (_currentNominalization == _totalVerbs - 1) {
          _currentNominalization = 0;
        }
        else {
           ++_currentNominalization;
        }
        nominalization();

      }

      else {
//        System.out.println("No verbs");
      }
    }
    else {
//      System.out.println("Must start nominalization");
    }


  }



  public void previousNominalization() {

    if(_startedNominalization) {
      if (_totalVerbs > 0) {

        if (_currentNominalization == 0) {
          _currentNominalization = _totalVerbs - 1;
        }
        else {
           --_currentNominalization;
        }
        nominalization();
       
      }
      else {
//        System.out.println("No verbs");
      }
    }
    else {
//      System.out.println("Must start nominalization");
    }


  }



  public void clickVerb(int posText) {

    if(_startedNominalization) {
     
      Iterator i = _positions.iterator();
      int counter = 0;
      Boolean passed = false;

      while(i.hasNext() && !passed) {
        Integer posVect = (Integer) i.next();
        if (posVect == posText) {
          _currentNominalization = counter;
          nominalization();
          passed = true;
        }
        ++counter;
  //      else if (posVect > posText) {
  //        passed = true;
  //      }
      }
     
    }
    else {
//      System.out.println("Must start nominalization");
    }


  }



  public void startNominalization() {
    if (_startedNominalization == true) {
//      System.out.println("Nominalization already begun");
    }
    else {
      _tStats.setLearntRules(0);
      _tStats.setLearntPairs(0);
      if (_text != null) {
        _startedNominalization = true;
        recogniseVerbs(_cleanText);
       
        _uStats.increaseTotalWords(_totalWords);
        _tStats.setTotalWords(_totalWords);
        _uStats.increaseMarkedWords(_totalVerbs);
        _tStats.setMarkedWords(_totalVerbs);
       
       
       
//        System.out.print(_totalVerbs + " verb/s detected: ");
        HashMap map = new HashMap();
       

        Iterator i = _verbsInfinitivesPositionVisited.iterator();
        while(i.hasNext()) {
          Pair<Pair<String,String>,Pair<Integer,Boolean>> pair = new Pair<Pair<String,String>,Pair<Integer,Boolean>>();
          pair = (Pair<Pair<String,String>,Pair<Integer,Boolean>>) i.next();
//          System.out.print(pair.first.first + " ");
          Integer n = (Integer) map.get(pair.first.second);
          if (n == null) {
            n = 0;
          }
          map.put(pair.first.second, new Integer(n + 1));

         
        }
//        System.out.print("\n");

        HashMap mapAux = new LinkedHashMap();
        List mapKeys = new ArrayList(map.keySet());
        List mapValues = new ArrayList(map.values());
        TreeSet sortedSet = new TreeSet(mapValues);
        Object[] sortedArray = sortedSet.toArray();
        int size = sortedArray.length;

        for (int j=0; j<size; j++) {
           mapAux.put(mapKeys.get(mapValues.indexOf(sortedArray[j])), sortedArray[j]);
        }

        Set ref = mapAux.keySet();
        Iterator it = ref.iterator();

        String mostCommon = "-";

        while (it.hasNext()) {
           mostCommon = (String)it.next();
        }

        _mostCommonVerb = mostCommon;
         
        _tStats.setMostCommonVerb(_mostCommonVerb);

        if (_totalWords > 0) {
          _uStats.increaseNominalizedTexts(1);
        }
       
        if (_totalVerbs > 0) {

          nominalization();

        }
      }
      else {
//        System.out.println("No text");
      }
    }


  }



  public void enterText(String text) {

    _text = null;
    _cleanText = null;
    _totalWords = 0;
    _currentNominalization = 0;
    _totalVerbs = 0;
    _positions = new ArrayList<Integer>();
    _startedNominalization = false;
    _verbsInfinitivesPositionVisited = new ArrayList< Pair<Pair<String,String>,Pair<Integer,Boolean>> >();
    _currentNounsAndProb = new ArrayList< Pair<String,Float> >();
    _learntRules = 0;
    _learntPairs = 0;
    _mostCommonVerb = "-";
    _tStats.setTotalWords(0);
    _tStats.setMarkedWords(0);
    _tStats.setLearntPairs(0);
    _tStats.setLearntRules(0);
    _tStats.setMostCommonVerb("-");
   
    _text = text;
    //System.out.println(text);

    _cleanText = cleanString(text);
    //System.out.println(_cleanText);

  }



 

  public int getTotalWords(){
    return _totalWords;
  }

  public int getTotalVerbs() {
    return _totalVerbs;
  }

  public String getText() {
    return _text;
  }

  public String getCleanText() {
    return _cleanText;
  }

  public ArrayList< Pair<Pair<String,String>,Pair<Integer,Boolean>> > getVerbsInfinitivesPositionVisited() {
    return _verbsInfinitivesPositionVisited;
  }

  public  ArrayList< Pair<String,Float> > getCurrentNounsAndProbability() {
    return _currentNounsAndProb;
  }


  public int getCurrentNominalizationPosition() {
    return _positions.get(_currentNominalization);
  }

  public Boolean getStartedNominalization() {
    return _startedNominalization;
  }

  public int getLearntRules() {
    return _learntRules;
  }

  public int getLearntPairs() {
    return _learntPairs;
  }

  public String getMostCommonVerb() {
    return _mostCommonVerb;
  }

  public ArrayList<Integer> getPositions() {
    return _positions;
  }

  public void enterTextOnly(String txt) {
    _text = txt;
  }
//  VerbDictionary _verbDict;  GETS I SETS DAIXO?
//  TestDictionary _nounDict;

}
TOP

Related Classes of domain.Text

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.