Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

org.apache.stanbol.enhancer.nlp.model.AnalysedText
Provides access to NLP processing results of the text/plain {@link Blob} of an ContentItem. Intended to be{@link ContentItem#addPart(org.apache.clerezza.rdf.core.UriRef,Object) addedas ContentPart} by using {@link #ANALYSED_TEXT_URI}. @see ContentItem#addPart(UriRef,Object)

    }


    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        String language = NlpEngineHelper.getLanguage(this, ci, true);
        AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
        //configure the spanTypes based on the configuration
//        EnumSet<Span.SpanTypeEnum> spanTypes = EnumSet.noneOf(SpanTypeEnum.class);
//        if(writeSentimentPhrases){
//            spanTypes.add(SpanTypeEnum.Chunk);
//        }

View Full Code Here

    
    @Test
    public void testEngine() throws IOException, EngineException {
        ContentItem ci = ciFactory.createContentItem(new StringSource(text));
        Assert.assertNotNull(ci);
        AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
        Assert.assertNotNull(at);
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
        Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
        
        Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {
            engine.computeEnhancements(ci);
        } catch (EngineException e) {
            RemoteServiceHelper.checkServiceUnavailable(e);
            return; //deactivate test
        }
        //now validate the enhancements
        int sentimentExpressionCnt=0;
        for(Iterator<Token> tokens = at.getTokens(); tokens.hasNext();){
            Token token = tokens.next();
            log.info("Token: {}",token);
            List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
            if(sentimentExpressionsList!=null && sentimentExpressionsList.size()>0)
              sentimentExpressionCnt++;

View Full Code Here

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText analysedText = getAnalysedText(this,ci, true);
        String language = getLanguage(this, ci, true);
        SentimentClassifier classifier = classifiers.get(language);
        if(classifier == null){
            throw new IllegalStateException("Sentiment Classifier for language '"
                + language +"' not available. As this is also checked in "
                + " canEnhance this may indicate an Bug in the used "
                + "EnhancementJobManager!");
        }
        //TODO: locking for AnalysedText not yet defined
//        ci.getLock().writeLock().lock();
//        try {
        Iterator<Token> tokens = analysedText.getTokens();
        while(tokens.hasNext()){
            Token token = tokens.next();
            Set<LexicalCategory> cats = null;
            boolean process = false;
            if(!adjectivesOnly){

View Full Code Here

    
    @Test
    public void testEngineDe() throws IOException, EngineException {
        ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
        Assert.assertNotNull(ci);
        AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
        Assert.assertNotNull(at);
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
        Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
        
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
        
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length()); 
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
        
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length()); 
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
        
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {
            engine.computeEnhancements(ci);
        } catch (EngineException e) {
            RemoteServiceHelper.checkServiceUnavailable(e);
            return; //deactivate test
        }
        //now validate the enhancements
        boolean foundVerb = false;
        boolean foundAdjective = false;
        boolean foundNoun = false;
        for(Iterator<Token> tokens = at.getTokens(); tokens.hasNext();){
            Token token = tokens.next();
            log.info("Token: {}",token);
            List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
            if(de_verb.equals(token.getSpan())){
                foundVerb = !mfs.isEmpty();

View Full Code Here

        return ENHANCE_ASYNC;
    }
    
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = getAnalysedText(this, ci, true);
        String language = getLanguage(this, ci, true);
        isLangaugeConfigured(this, languageConfig, language, true);
        List<SentimentExpression> seList;
        try {
          seList = this.client.extractSentimentExpressions(at.getSpan(), language);
        } catch (IOException e) {
            throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
        } catch (SOAPException e) {
            throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
        }
        
        for(SentimentExpression se : seList){
            //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(se.getStartSnippet(),se.getEndSnippet());
            token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()) );
        }
    }

View Full Code Here

    public void setupTest() throws IOException {
        //create a contentItem for the plain text used for testing
        InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_FILE+"' via classpath",is);
        ContentItem ci = cif.createContentItem(new StreamSource(is,"text/plain"));
        AnalysedText at = atf.createAnalysedText(ci, ci.getBlob());
        is.close();
        //parse the prepared NLP results and add it to the ContentItem
        is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_NLP_FILE+"' via classpath",is);
        AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at);
        is.close();
        //set the language of the contentItem
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, 
            EN_LANGUAGE));
        //set the contentItem and also the content
        this.ci = ci;
        this.content = at.getText().toString();
    }

View Full Code Here

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        
        Tokenizer tokenizer = getTokenizer(language);
        if(tokenizer == null){
            log.warn("Tokenizer for language {} is no longer available. "
                    + "This might happen if the model becomes unavailable during enhancement. "
                    + "If this happens more often it might also indicate an bug in the used "
                    + "EnhancementJobManager implementation as the availability is also checked "
                    + "in the canEnhance(..) method of this Enhancement Engine.");
            return;
        }
        //Try to use sentences for tokenizing
        Iterator<? extends Section> sections = at.getSentences();
        if(!sections.hasNext()){
            //if no sentences are annotated
            sections = Collections.singleton(at).iterator();
        }

View Full Code Here

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = getAnalysedText(this, ci, true);
        String language = getLanguage(this, ci, true);
        isLangaugeConfigured(this, languageConfiguration, language, true);
        //init the PhraseBuilder
        ChunkFactory chunkFactory = new ChunkFactoryImpl(at, ci.getLock());
        List<PhraseBuilder> phraseBuilders = new ArrayList<PhraseBuilder>(phraseTypeDefinitions.size());
        for(PhraseTypeDefinition ptd : phraseTypeDefinitions){
            phraseBuilders.add(new PhraseBuilder(ptd, chunkFactory, minPosScore));
        }
        Iterator<? extends Section> sentences = at.getSentences();
        if(!sentences.hasNext()){ //no sentences ... iterate over the whole text
            sentences = Collections.singleton(at).iterator();
        }
        while(sentences.hasNext()){
            // (1) get Tokens and POS information for the sentence

View Full Code Here

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {
            log.warn("SentenceDetector model for language {} is no longer available. "
                + "This might happen if the model becomes unavailable during enhancement. "

View Full Code Here

        Map<UriRef,Resource> expected = new HashMap<UriRef,Resource>();
        expected.put(Properties.DC_CREATOR, lf.createTypedLiteral(engine.getClass().getName()));
        expected.put(Properties.ENHANCER_EXTRACTED_FROM,contentItem.getUri());
        Assert.assertEquals(16, EnhancementStructureHelper.validateAllTextAnnotations(
            contentItem.getMetadata(), text, expected));
        AnalysedText at = AnalysedTextUtils.getAnalysedText(contentItem);
        Assert.assertNotNull(at);
        List<Sentence> sentences = AnalysedTextUtils.asList(at.getSentences());
        Assert.assertNotNull(sentences);
        Assert.assertEquals(7, sentences.size());
        //TODO: values in the following arrays are based on the first run of the
        // engine. So this is only to detect changes in results. It can not validate
        // that the tokenization and NER detections are correct - sorry I do not

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.AnalysedText

org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngine

org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngineTest

org.apache.stanbol.enhancer.engines.celi.sentimentanalysis.impl.CeliAnalyzedTextSentimentAnalysisEngine

org.apache.stanbol.enhancer.engines.celi.sentimentanalysis.impl.CeliAnalyzedTextSentimentAnalysisEngineTest

org.apache.stanbol.enhancer.engines.entitycomention.EntityCoMentionEngine

org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

org.apache.stanbol.enhancer.engines.kuromoji.impl.TestKuromojiNlpEngine

org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngine

org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.