Examples of org.apache.stanbol.enhancer.nlp.model.Chunk

Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Chunk

org.apache.stanbol.enhancer.nlp.model.Chunk

        morpho.addTense(new TenseTag("test-tense", Tense.Present));
        morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
        enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));


        //create a chunk
        Chunk stanbolEnhancer = analysedTextWithData.addChunk(stanbol.getStart(), enhancer.getEnd());
        expectedChunks.put(stanbolEnhancer, "Stanbol enhancer");
        stanbolEnhancer.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(
            new NerTag("organization", DBPEDIA_ORGANISATION)));
        stanbolEnhancer.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(
            new PhraseTag("NP", LexicalCategory.Noun),0.98));


    }

View Full Code Here

                endIndex--;
            }
            lastConsumedToken = current.get(endIndex);
            //NOTE: ignore phrases with a single token
            if(endIndex > 0){
                Chunk chunk = chunkFactory.createChunk(current.get(0), lastConsumedToken);
                //TODO: add support for confidence
                chunk.addAnnotation(PHRASE_ANNOTATION, Value.value(phraseTag));
                if(log.isTraceEnabled()){
                  log.trace("  << add {} phrase {} '{}'", new Object[]{
                      phraseType.getPhraseType().name(), chunk,chunk.getSpan()});
                }
            } else if(log.isTraceEnabled()){
              log.trace("  >> ignore {} phrase with single {} ", 
                  phraseType.getPhraseType().name() ,
                  current.get(0));

View Full Code Here

                token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
                //NER
                NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))){
                    //write NER annotation
                    Chunk chunk = at.addChunk(ner.start, ner.end);
                    chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
                    //NOTE that the fise:TextAnnotation are written later based on the nerList
                    //clean up
                    ner = null;
                }
                if(nerTag != null){

View Full Code Here

                    boolean start = chunkTags[i].charAt(0) == 'B';
                    boolean end = tag != null && (start || chunkTags[i].charAt(0) == 'O');
                    if(end){ //add the current phrase
                        //add at AnalysedText level, because offsets are absolute
                        //NOTE we are already at the next token when we detect the end
                        Chunk chunk = at.addChunk( 
                            tokenList.get(i-chunkTokenCount).getStart(), 
                            tokenList.get(i-1).getEnd());
                        chunk.addAnnotation(PHRASE_ANNOTATION, 
                            new Value<PhraseTag>(tag,
                                    chunkProps/(double)chunkTokenCount));
                        //reset the state
                        tag = null;
                        chunkTokenCount = 0;
                        chunkProps = 0;
                    }
                    if(start){ //create the new tag
                        tag = getPhraseTag(tagSet,adhocTags,
                            chunkTags[i].substring(2), language); //skip 'B-'
                        
                    }
                    if(tag != null){ //count this token for the current chunk
                        chunkProps = chunkProps + chunkProb[i];
                        chunkTokenCount++;
                    }
                }
                if(tag != null){
                    Chunk chunk = at.addChunk( 
                        tokenList.get(i-chunkTokenCount).getStart(), 
                        tokenList.get(i-1).getEnd());
                    chunk.addAnnotation(PHRASE_ANNOTATION, 
                        new Value<PhraseTag>(tag,
                                chunkProps/(double)chunkTokenCount));
                    
                }
                // (4) clean up

View Full Code Here

        morpho.addTense(new TenseTag("test-tense", Tense.Present));
        morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
        enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));


        //create a chunk
        Chunk stanbolEnhancer = analysedTextWithData.addChunk(stanbol.getStart(), enhancer.getEnd());
        expectedChunks.put(stanbolEnhancer, "Stanbol enhancer");
        stanbolEnhancer.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(
            new NerTag("organization", DBPEDIA_ORGANISATION)));
        stanbolEnhancer.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(
            new PhraseTag("NP", LexicalCategory.Noun),0.98));


    }

View Full Code Here

                    occurrences = new ArrayList<NameOccurrence>();
                }
                occurrences.add(occurrence);
                nameOccurrences.put(name, occurrences);
                //add also the NerAnnotation to the AnalysedText
                Chunk chunk = at.addChunk(start, end);
                //TODO: build AnnotationModel based on the configured Mappings
                chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, confidence));
            }
        }
        finder.clearAdaptiveData();
        log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences);
        return nameOccurrences;

View Full Code Here

                    boolean start = chunkTags[i].charAt(0) == 'B';
                    boolean end = tag != null && (start || chunkTags[i].charAt(0) == 'O');
                    if(end){ //add the current phrase
                        //add at AnalysedText level, because offsets are absolute
                        //NOTE we are already at the next token when we detect the end
                        Chunk chunk = at.addChunk( 
                            tokenList.get(i-chunkTokenCount).getStart(), 
                            tokenList.get(i-1).getEnd());
                        chunk.addAnnotation(PHRASE_ANNOTATION, 
                            new Value<PhraseTag>(tag,
                                    chunkProps/(double)chunkTokenCount));
                        //reset the state
                        tag = null;
                        chunkTokenCount = 0;
                        chunkProps = 0;
                    }
                    if(start){ //create the new tag
                        tag = getPhraseTag(tagSet,adhocTags,
                            chunkTags[i].substring(2), language); //skip 'B-'
                        
                    }
                    if(tag != null){ //count this token for the current chunk
                        chunkProps = chunkProps + chunkProb[i];
                        chunkTokenCount++;
                    }
                }
                if(tag != null){
                    Chunk chunk = at.addChunk( 
                        tokenList.get(i-chunkTokenCount).getStart(), 
                        tokenList.get(i-1).getEnd());
                    chunk.addAnnotation(PHRASE_ANNOTATION, 
                        new Value<PhraseTag>(tag,
                                chunkProps/(double)chunkTokenCount));
                    
                }
                // (4) clean up

View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Chunk

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

org.apache.stanbol.enhancer.engines.opennlp.chunker.services.OpenNlpChunkingEngine

org.apache.stanbol.enhancer.engines.opennlp.impl.NEREngineCore

org.apache.stanbol.enhancer.engines.poschunker.PhraseBuilder

org.apache.stanbol.enhancer.nlp.json.AnalyzedTextSerializerAndParserTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.