Package org.dbpedia.spotlight.spot.cooccurrence.filter

Examples of org.dbpedia.spotlight.spot.cooccurrence.filter.FilterPOS


   */
  public List<SurfaceFormOccurrence> select(List<SurfaceFormOccurrence> surfaceFormOccurrences) {

    List<SurfaceFormOccurrence> selectedOccurrences = new LinkedList<SurfaceFormOccurrence>();

    FilterPOS filterPOS = new FilterPOS();
    FilterTermsize unigramFilter = new FilterTermsize(FilterTermsize.Termsize.unigram);
    FilterPattern filterPattern = new FilterPattern();

    SpotClassifier unigramClassifier = ClassifierFactory.getClassifierInstanceUnigram();
    SpotClassifier ngramClassifier = ClassifierFactory.getClassifierInstanceNGram();

    assert unigramClassifier != null;
    assert ngramClassifier != null;

    //ngramClassifier.setVerboseMode(true);                                         f
    //unigramClassifier.setVerboseMode(true);
    List<String> decisions = new LinkedList<String>();

    for(SurfaceFormOccurrence surfaceFormOccurrence : surfaceFormOccurrences) {

            if (surfaceFormOccurrence.surfaceForm().name().trim().length()==0) {
                LOG.warn("I have an occurrence with empty surface form. :-O Ignoring.");
                LOG.error(surfaceFormOccurrence);
                continue;
            }

            if (! (surfaceFormOccurrence.context() instanceof TaggedText)) { //FIXME added this to avoid breaking, but code below will never run if we don't pass the taggedtext
                LOG.error(String.format("SurfaceFormOccurrence did not contain TaggedText. Cannot apply %s",this.getClass()));
       
                selectedOccurrences.add(surfaceFormOccurrence);
                continue;
            }


      if(unigramFilter.applies(surfaceFormOccurrence)) {

        /**
         * Unigram (n = 1)
         */

        if(!filterPOS.applies(surfaceFormOccurrence)) {

          /**
           * The Surface Form is on the POS blacklist, i.e. a single adjective,
           * verb, etc.
           */
 
View Full Code Here


  }

  @Override
  /** {@inheritDoc} */
  public List<SurfaceFormOccurrence> getUnigramCandidates() {
    FilterPOS filterPOS = new FilterPOS();
    List<SurfaceFormOccurrence> surfaceFormOccurrences = new LinkedList<SurfaceFormOccurrence>();

    for(TaggedToken taggedToken : taggedTokens) {

      if(!filterPOS.isOnUnigramBlacklist(taggedToken.getPOSTag())) {
        surfaceFormOccurrences.add(new SurfaceFormOccurrence(new SurfaceForm(taggedToken.getToken()), null, taggedToken.getOffset()));
      }

    }

View Full Code Here

        dataProvider);
   
    instanceBuilder.setVerboseMode(true);

    filters.add(new FilterTermsize(FilterTermsize.Termsize.unigram));
    filters.add(new FilterPOS());
    filters.add(new FilterPattern());

    header = new Instances("UnigramTraining", buildAttributeList(), buildAttributeList().size());

  }
View Full Code Here

TOP

Related Classes of org.dbpedia.spotlight.spot.cooccurrence.filter.FilterPOS

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.