Examples of CoOccurrenceData


Examples of org.dbpedia.spotlight.spot.cooccurrence.features.data.CoOccurrenceData

       */

      if(left1 != null && !leftContext.get(0).getPOSTag().matches(FUNCTION_WORD_PATTERN) && !leftContext.get(0).getPOSTag().contains("$") && !leftContext.get(0).getPOSTag().equals("in")) {

        try {
          CoOccurrenceData leftBigram = dataProvider.getBigramData(left1, candidateData);

          if(leftBigram != null){
          try{
            instance.setValue(i(bigram_left_significance_web, buildAttributeList()), leftBigram.getUnitSignificanceWeb());
          }catch (ArrayIndexOutOfBoundsException ignored) {}
          }

        } catch (ItemNotFoundException ignored) {}


      }


      /**
       * Co-Occurrence data of the left two tokens
       */
      if(left1 != null && left2 != null) {

        try {
          CoOccurrenceData leftTrigram = dataProvider.getTrigramData(left2, left1, candidateData);
          if(!(leftContext.get(0).getPOSTag().equals(",") || leftContext.get(1).getPOSTag().equals(",")) &&
              !(leftContext.get(0).getPOSTag().equals("in") && leftContext.get(1).getPOSTag().equals("at"))
              && leftTrigram.getUnitCountWeb() >= this.trigramLeftWebMin
              )
            instance.setValue(i(trigram_left_count_web, buildAttributeList()), leftTrigram.getUnitCountWeb());
        }
        catch (ArrayIndexOutOfBoundsException ignored) {}
        catch (ItemNotFoundException ignored) {}

      }


      /**
       * Co-Occurrence data of the right two tokens
       */

      if(right1 != null && right2 != null) {

        try{
          CoOccurrenceData rightTrigram = dataProvider.getTrigramData(candidateData, right1, right2);

          if(!(rightContext.get(0).getPOSTag().equals(",") || rightContext.get(1).getPOSTag().equals(","))
            && rightTrigram.getUnitCountWeb() >= this.trigramRightWebMin)
            instance.setValue(i(trigram_right_count_web, buildAttributeList()), rightTrigram.getUnitCountWeb());
        }
        catch (ArrayIndexOutOfBoundsException ignored) { }
        catch (ItemNotFoundException ignored) { }
      }




      /**
       * Co-Occurrence data with term in the middle
       */

      if(left1 != null && right1 != null) {
        try{
          CoOccurrenceData middleTrigram = dataProvider.getTrigramData(left1, candidateData, right1);
          if(!(leftContext.get(0).getPOSTag().equals(",") || rightContext.get(0).getPOSTag().equals(","))
              && !(leftContext.get(0).getPOSTag().equals("in") || rightContext.get(0).getPOSTag().equals("cc"))
              && middleTrigram.getUnitCountWeb() >= this.trigramMiddleWebMin
              )
            instance.setValue(i(trigram_middle_count_web, buildAttributeList()), middleTrigram.getUnitCountWeb());
        }
        catch (ArrayIndexOutOfBoundsException ignored) { }
        catch (ItemNotFoundException ignored) { }
      }



      /**
       * Co-Occurrence data of the right neighbour token:
       */

      if(right1 != null && !rightContext.get(0).getPOSTag().matches(FUNCTION_WORD_PATTERN)) {

        CoOccurrenceData rightBigram = null;
        try {
          rightBigram = dataProvider.getBigramData(candidateData, right1);
        } catch (ItemNotFoundException e) {
          //No right neighbour token found or no data for the token
        }

        if (rightBigram != null) {

          try {
            instance.setValue(i(bigram_right_significance_web, buildAttributeList()), rightBigram.getUnitSignificanceWeb());
          }catch (ArrayIndexOutOfBoundsException ignored) {}

        }
      }
    }
View Full Code Here

Examples of org.dbpedia.spotlight.spot.cooccurrence.features.data.CoOccurrenceData

    if(termSize == 2) {

      try {
        if(firstTaggedTokenData != null && secondTaggedTokenData != null) {
          CoOccurrenceData bigramData = dataProvider.getBigramData(firstTaggedTokenData, secondTaggedTokenData);

          //if (bigramData.getUnitCountWeb() > bigramLeftWebMin)
            instance.setValue(i(count_web, buildAttributeList()), bigramData.getUnitCountWeb());
        }
      } catch (ItemNotFoundException ignored) {}
      catch (ArrayIndexOutOfBoundsException ignored) {}



    }



    List<String> verbs = new LinkedList<String>();

    boolean allLowercase = surfaceFormOccurrence.surfaceForm().name().toLowerCase().equals(surfaceFormOccurrence.surfaceForm().name());
    boolean allUppercase = surfaceFormOccurrence.surfaceForm().name().toUpperCase().equals(surfaceFormOccurrence.surfaceForm().name());

    int capitalizedWords = 0;

    for(TaggedToken candidateToken : candidateTokens) {
      if(candidateToken.getPOSTag().startsWith("v") || candidateToken.getPOSTag().equals("be")) {
        verbs.add(candidateToken.getPOSTag());
      }

      if(Character.isUpperCase(candidateToken.getToken().charAt(0)))
        capitalizedWords++;
    }

    try{
      if(verbs.size() > 1)
        instance.setValue(i(contains_verb, buildAttributeList()), 5);
      else if(verbs.size()==0)
        instance.setValue(i(contains_verb, buildAttributeList()), 0);
      else if(verbs.get(0).equals("vb"))
        instance.setValue(i(contains_verb, buildAttributeList()), 1);
      else if(verbs.get(0).equals("vbd"))
        instance.setValue(i(contains_verb, buildAttributeList()), 2);
      else if(verbs.get(0).equals("vbg"))
        instance.setValue(i(contains_verb, buildAttributeList()), 3);
      else if(verbs.get(0).equals("vbn"))
        instance.setValue(i(contains_verb, buildAttributeList()), 4);
      else if(verbs.get(0).equals("be"))
        instance.setValue(i(contains_verb, buildAttributeList()), 5);
    } catch (ArrayIndexOutOfBoundsException ignored) {}

    try{
      if(allLowercase)
        instance.setValue(i(term_case, buildAttributeList()), 0);
      else if(allUppercase)
        instance.setValue(i(term_case, buildAttributeList()), 3);
      else if(capitalizedWords == candidateTokens.size())
        instance.setValue(i(term_case, buildAttributeList()), 2);
      else if(capitalizedWords == 1 && Character.isUpperCase(candidateTokens.get(0).getToken().charAt(0)))
        instance.setValue(i(term_case, buildAttributeList()), 4);
      else
        instance.setValue(i(term_case, buildAttributeList()), 1);


    } catch (ArrayIndexOutOfBoundsException ignored) {}


    try{
      instance.setValue(i(candidate_size, buildAttributeList()), termSize);
    } catch (ArrayIndexOutOfBoundsException ignored) {}

    try {
      TaggedToken leftNeighbourToken = text.taggedTokenProvider().getLeftNeighbourToken(surfaceFormOccurrence);

      if(leftNeighbourToken.getPOSTag().equals("to")) {
        instance.setValue(i(pre_pos, buildAttributeList()), 0);
      }
      else if(leftNeighbourToken.getPOSTag().matches("[mnf].*")) {
        instance.setValue(i(pre_pos, buildAttributeList()), 1);
      }else if(leftNeighbourToken.getToken().matches("[aA][nN]?")) {
        instance.setValue(i(pre_pos, buildAttributeList()), 2);
      }

    } catch (ItemNotFoundException ignored) {

    } catch (ArrayIndexOutOfBoundsException ignored) {}


    try {

      if(leftContext.size() > 0) {

        if(leftContext.get(0).getPOSTag().equals("to")) {
          instance.setValue(i(pre_pos, buildAttributeList()), 0);
        }
        else if(leftContext.get(0).getPOSTag().matches("[mnf].*")) {
          instance.setValue(i(pre_pos, buildAttributeList()), 1);
        }else if(leftContext.get(0).getToken().matches("[aA][nN]?")) {
          instance.setValue(i(pre_pos, buildAttributeList()), 2);
        }
      }

    } catch (ArrayIndexOutOfBoundsException ignored) {}

    try{
      if (CandidateFeatures.quoted(surfaceFormOccurrence) == 1)
        instance.setValue(i(quoted, buildAttributeList()), 0);

    } catch (ArrayIndexOutOfBoundsException ignored) {}




    try {
      if(rightContext.size() > 0) {

        if(rightContext.get(0).getToken().equals("of")) {
          instance.setValue(i(next_pos, buildAttributeList()), 0);
        }else if(rightContext.get(0).getToken().equals("to")) {
          instance.setValue(i(next_pos, buildAttributeList()), 1);
        }else if(rightContext.get(0).getPOSTag().startsWith("be")) {
          instance.setValue(i(next_pos, buildAttributeList()), 2);
        }else if(rightContext.get(0).getPOSTag().startsWith("v")) {
          instance.setValue(i(next_pos, buildAttributeList()), 3);
        }
      }
    } catch (ArrayIndexOutOfBoundsException ignored) {}



    try {
      TaggedToken lastToken = candidateTokens.get(candidateTokens.size() - 1);


      if(lastToken.getPOSTag().equals("in")) {
        instance.setValue(i(ends_with, buildAttributeList()), 0);
      }
    } catch (ArrayIndexOutOfBoundsException ignored) {}



    /**
     * Co-Occurrence data of the left neighbour token:
     */


    if(left1 != null && firstTaggedTokenData != null && leftContext.size() > 0 && !leftContext.get(0).getPOSTag().matches(FUNCTION_WORD_PATTERN) && !leftContext.get(0).getPOSTag().equals("in")) {

      CoOccurrenceData bigramLeft = null;
      try {
        bigramLeft = dataProvider.getBigramData(left1, firstTaggedTokenData);
      } catch (ItemNotFoundException ignored) {}

      if(bigramLeft != null && bigramLeft.getUnitSignificanceWeb() > bigramLeftWebMin) {

        try{
          instance.setValue(i(bigram_left_significance_web, buildAttributeList()), bigramLeft.getUnitSignificanceWeb());
        } catch (ArrayIndexOutOfBoundsException ignored) {}

      }
    }

    /**
     * Co-Occurrence data for the left trigram
     */

    if(firstTaggedTokenData != null && secondTaggedTokenData != null && left1 != null) {

      CoOccurrenceData trigramLeft = null;
      try {
        trigramLeft = dataProvider.getTrigramData(left1, firstTaggedTokenData, secondTaggedTokenData);
      } catch (ItemNotFoundException ignored) {}

      if(trigramLeft != null && trigramLeft.getUnitCountWeb() > trigramLeftWebMin) {

        try{
          instance.setValue(i(trigram_left, buildAttributeList()), trigramLeft.getUnitCountWeb());
        } catch (ArrayIndexOutOfBoundsException ignored) {}

      }
    }


    if(lastTaggedTokenData != null && lastBut1TaggedTokenData != null && right1 != null) {

      CoOccurrenceData trigramRight = null;
      try {
        trigramRight = dataProvider.getTrigramData(lastBut1TaggedTokenData, lastTaggedTokenData, right1);
      } catch (ItemNotFoundException ignored) {}
      catch(NullPointerException ignored) {}

      if(trigramRight != null && trigramRight.getUnitCountWeb() > trigramRightWebMin) {

        try{
          instance.setValue(i(trigram_right, buildAttributeList()), trigramRight.getUnitCountWeb());
        } catch (ArrayIndexOutOfBoundsException ignored) {}

      }
    }



    /**
     * Co-Occurrence data of the right neighbour token:
     */

    if(lastTaggedTokenData != null && right1 != null && !rightContext.get(0).getPOSTag().matches(FUNCTION_WORD_PATTERN) && !rightContext.get(0).getPOSTag().equals("in")) {
      CoOccurrenceData bigramRight = null;
      try {
        bigramRight = dataProvider.getBigramData(lastTaggedTokenData, right1);
      } catch (ItemNotFoundException ignored) {}

      if(bigramRight != null && bigramRight.getUnitSignificanceWeb() > bigramRightWebMin) {

        try {
          instance.setValue(i(bigram_right_significance_web, buildAttributeList()), bigramRight.getUnitSignificanceWeb());
        } catch (ArrayIndexOutOfBoundsException ignored) {}

      }
    }

View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.