Package org.dbpedia.spotlight.spot.cooccurrence.classification

Examples of org.dbpedia.spotlight.spot.cooccurrence.classification.SpotClassifier


        this(spotterConfiguration);

        //TODO Instead of doing a test classification here, we should properly check if the serialized model suits the WEKA instances that are produced from SurfaceFormOccurrences.
        LOG.info("Testing classifiers for co-occurrence based spot selector.");
    SpotClassifier unigramClassifier = ClassifierFactory.getClassifierInstanceUnigram();
    SpotClassifier ngramClassifier = ClassifierFactory.getClassifierInstanceNGram();

        Text taggedText = new TaggedText("Bill Gates is a software developer from Berlin.", taggedTokenProvider);

        SurfaceFormOccurrence ngramOccurrence = new SurfaceFormOccurrence(new SurfaceForm("Bill Gates"),
                        taggedText, 0, Provenance.Undefined(), -1);

        SurfaceFormOccurrence unigramOccurrence = new SurfaceFormOccurrence(new SurfaceForm("Berlin"),
                        taggedText, 41, Provenance.Undefined(), -1);

        try {
            unigramClassifier.classify(unigramOccurrence);
            ngramClassifier.classify(ngramOccurrence);
        } catch (Exception e) {
            throw new InitializationException("An error occurred while classifying a test spot using the co-occurrence " +
                    "based spot selector. This is most probably caused by an outdated spot selector model. Please " +
                    "check the spot selector models defined 'org.dbpedia.spotlight.spot.cooccurrence.classifier.*'.", e);
        }
View Full Code Here


    FilterPOS filterPOS = new FilterPOS();
    FilterTermsize unigramFilter = new FilterTermsize(FilterTermsize.Termsize.unigram);
    FilterPattern filterPattern = new FilterPattern();

    SpotClassifier unigramClassifier = ClassifierFactory.getClassifierInstanceUnigram();
    SpotClassifier ngramClassifier = ClassifierFactory.getClassifierInstanceNGram();

    assert unigramClassifier != null;
    assert ngramClassifier != null;

    //ngramClassifier.setVerboseMode(true);                                         f
    //unigramClassifier.setVerboseMode(true);
    List<String> decisions = new LinkedList<String>();

    for(SurfaceFormOccurrence surfaceFormOccurrence : surfaceFormOccurrences) {

            if (surfaceFormOccurrence.surfaceForm().name().trim().length()==0) {
                LOG.warn("I have an occurrence with empty surface form. :-O Ignoring.");
                LOG.error(surfaceFormOccurrence);
                continue;
            }

            if (! (surfaceFormOccurrence.context() instanceof TaggedText)) { //FIXME added this to avoid breaking, but code below will never run if we don't pass the taggedtext
                LOG.error(String.format("SurfaceFormOccurrence did not contain TaggedText. Cannot apply %s",this.getClass()));
       
                selectedOccurrences.add(surfaceFormOccurrence);
                continue;
            }


      if(unigramFilter.applies(surfaceFormOccurrence)) {

        /**
         * Unigram (n = 1)
         */

        if(!filterPOS.applies(surfaceFormOccurrence)) {

          /**
           * The Surface Form is on the POS blacklist, i.e. a single adjective,
           * verb, etc.
           */


          if(Character.isUpperCase(surfaceFormOccurrence.surfaceForm().name().charAt(0))){
            TaggedToken taggedToken = ((TaggedText) surfaceFormOccurrence.context()).taggedTokenProvider().getTaggedTokens(surfaceFormOccurrence).get(0);

            /**
             * Add uppercase adjectives (e.g. Canadian tv star)
             */
            if(taggedToken.getPOSTag() != null && taggedToken.getPOSTag().startsWith("j"))
              selectedOccurrences.add(surfaceFormOccurrence);

          }else{
            decisions.add("Dropped by POS filter: " + surfaceFormOccurrence);

          }

        }else if(!filterPattern.applies(surfaceFormOccurrence)){
          decisions.add("Dropped by Pattern filter: " + surfaceFormOccurrence);
        }else{

                    SpotClassification spotClassification;
                    try {
                        spotClassification = unigramClassifier.classify(surfaceFormOccurrence);

                        if(spotClassification.getCandidateClass() == SpotClass.valid) {
                            selectedOccurrences.add(surfaceFormOccurrence);
                            //LOG.info(("Kept by UnigramClassifier (Confidence: " + spotClassification.getConfidence() + "): " + surfaceFormOccurrence);
                        }else{
                            decisions.add("Dropped by UnigramClassifier (Confidence: " + spotClassification.getConfidence() + "): " + surfaceFormOccurrence);
                        }

                    } catch (Exception e) {
                        LOG.error("Exception when classifying unigram candidate: " + e);
                    }

        }


      }else{

        /**
         * n > 1
         */

        SpotClassification spotClassification;
        try{
          spotClassification = ngramClassifier.classify(surfaceFormOccurrence);
        }catch (Exception e) {
                    LOG.error("Exception when classifying ngram candidate: " + e);
                    continue;
        }

View Full Code Here

  public ClassifierFactory(String unigramModelFile, String ngramModelFile,
               String occurrenceDataSource, OccurrenceDataProvider dataProvider)
      throws InitializationException {

    //Create the unigram classifier:
    classifierUnigram = new SpotClassifier(
        unigramModelFile, dataProvider,
        InstanceBuilderFactory.createInstanceBuilderUnigram(occurrenceDataSource, dataProvider));
    //classifierUnigram.setVerboseMode(true);

    //Create the n-gram classifier:
    classifierNGram = new SpotClassifier(
        ngramModelFile, dataProvider,
        InstanceBuilderFactory.createInstanceBuilderNGram(occurrenceDataSource, dataProvider));
    //classifierNGram.setVerboseMode(true);

  }
View Full Code Here

TOP

Related Classes of org.dbpedia.spotlight.spot.cooccurrence.classification.SpotClassifier

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.