Examples of org.apache.stanbol.enhancer.nlp.model.Token

Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Token

org.apache.stanbol.enhancer.nlp.model.Token

        while(sections.hasNext()){
            Section section = sections.next();
            //Tokenize section
            opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
            for(int i=0;i<tokenSpans.length;i++){
                Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
                log.trace(" > add {}",token);
            }
        }
    }

View Full Code Here

                continue; //ignore terms without readings
            }
            //Add the LexicalEntry as Token to the Text. NOTE that if a
            //Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(term.getFrom(), term.getTo());
            //Now try to get POS annotations for the Token
            for(Value<PosTag> posAnno : token.getAnnotations(NlpAnnotations.POS_ANNOTATION)){
                if(posAnno.value().isMapped()){
                    for(LexicalCategory cat :posAnno.value().getCategories()){
                        if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob
                            tokenLexCats.put(cat, posAnno.probability());
                        }
                    }
                }
            }
            for(Reading reading : term.getTermReadings()){
                MorphoFeatures mf = CeliMorphoFeatures.parseFrom(reading, language);
                //add the readings (MorphoFeatures)
                if(mf != null){
                    //use the POS tags of the morpho analysis and compare it
                    //with existing POS tags.
                    double posProbability = -1;
                    Set<LexicalCategory> mfCats = EnumSet.noneOf(LexicalCategory.class);
                    for(PosTag mfPos : mf.getPosList()){
                        mfCats.addAll(mfPos.getCategories());
                    }
                    for(LexicalCategory mfCat : mfCats){
                        Double prob = tokenLexCats.get(mfCat);
                        if(prob != null && posProbability < prob){
                            posProbability = prob;
                        }
                    }
                    //add the morpho features with the posProbabiliy
                    Value<MorphoFeatures> value = Value.value(mf,
                        posProbability < 0 ? Value.UNKNOWN_PROBABILITY : posProbability);
                    token.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, value);
                }
            }
        }
    }

View Full Code Here

        }
        
        for(SentimentExpression se : seList){
            //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(se.getStartSnippet(),se.getEndSnippet());
            token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()) );
        }
    }

View Full Code Here

            return; //deactivate test
        }
        //now validate the enhancements
        int sentimentExpressionCnt=0;
        for(Iterator<Token> tokens = at.getTokens(); tokens.hasNext();){
            Token token = tokens.next();
            log.info("Token: {}",token);
            List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
            if(sentimentExpressionsList!=null && sentimentExpressionsList.size()>0)
              sentimentExpressionCnt++;
        }
       
        Assert.assertTrue("2 sentiment expressions should be recognized in: "+text,sentimentExpressionCnt==2);

View Full Code Here

        //now the tokens
        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());

View Full Code Here

        //      best match for all followings.
        //      We do not want such copies.
        PosTag[] actPos = new PosTag[posSequences.length];
        double[] actProp = new double[posSequences.length];
        for(int i=0;i<tokenTexts.length;i++){
            Token token = tokenList.get(i);
            boolean done = false;
            int j = 0;
            while( j < posSequences.length && !done){
                String p = posSequences[j].getOutcomes().get(i);
                done = j > 0 && p.equals(actPos[0].getTag());
                if(!done){
                    actPos[j] = getPosTag(posModel,adhocTags,p,language);
                    actProp[j] = posSequences[j].getProbs()[i];
                    j++;
                }
            }
            //create the POS values
            token.addAnnotations(POS_ANNOTATION, Value.values(actPos, actProp,j));
        }


    }

View Full Code Here

        Tokenizer tokenizer = getTokenizer(langauge);
        String text = section.getSpan();
        List<Token> tokens = new ArrayList<Token>(text.length()/5); //assume avr. token length is 5
        opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
        for(int i=0;i<tokenSpans.length;i++){
            Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
            log.trace(" > add {}",token);
            tokens.add(token);
        }
        return tokens;
    }

View Full Code Here

0 1 2

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Token

org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngine

org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngineTest

org.apache.stanbol.enhancer.engines.celi.sentimentanalysis.impl.CeliAnalyzedTextSentimentAnalysisEngine

org.apache.stanbol.enhancer.engines.celi.sentimentanalysis.impl.CeliAnalyzedTextSentimentAnalysisEngineTest

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

org.apache.stanbol.enhancer.engines.opennlp.chunker.services.OpenNlpChunkingEngine

org.apache.stanbol.enhancer.engines.opennlp.impl.NEREngineCore

org.apache.stanbol.enhancer.engines.opennlp.pos.services.OpenNlpPosTaggingEngine

org.apache.stanbol.enhancer.engines.opennlp.token.impl.OpenNlpTokenizerEngine

org.apache.stanbol.enhancer.engines.poschunker.engine.PosChunkerEngine

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.