Examples of AnalysedText

org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText
org.apache.stanbol.enhancer.nlp.model.AnalysedText
Provides access to NLP processing results of the text/plain {@link Blob} of an ContentItem. Intended to be{@link ContentItem#addPart(org.apache.clerezza.rdf.core.UriRef,Object) addedas ContentPart} by using {@link #ANALYSED_TEXT_URI}. @see ContentItem#addPart(UriRef,Object)

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText




    @Test
    public void testSingleSentenceDefaultConfig(){
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    @Test
    public void testSingleSentenceChunkerConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.forcePosTypeChunker(false);
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    @Test
    public void testSingleSentenceNoChunkerConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.enableChunker(false);
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertFalse(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    public void testSingleSentenceNoChunkerNoPosConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.enablePosTagger(false);
        config.enableChunker(true);//must be ignored for Chunks if no Pos
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertFalse(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE);
        Iterator<AnalysedText> analysedSentences = analyzer.analyse(MULTI_SENTENCES);
        assertNotNull(analysedSentences);
        int sentenceCount = 0;
        while(analysedSentences.hasNext()){
            AnalysedText analysed = analysedSentences.next();
            checkSingleSentence(analysed, MULTIPLE_SENTENCE_TOKENS[sentenceCount], true, true);
            sentenceCount++;
        }
        assertTrue(sentenceCount == 3);
    }

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText




    @Test
    public void testSingleSentenceDefaultConfig(){
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    @Test
    public void testSingleSentenceChunkerConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.forcePosTypeChunker(false);
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    @Test
    public void testSingleSentenceNoChunkerConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.enableChunker(false);
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertTrue(analyzer.getConfig().isPosTaggerEnable());
        assertFalse(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText

    public void testSingleSentenceNoChunkerNoPosConfig(){
        TextAnalyzerConfig config = new TextAnalyzerConfig();
        config.enablePosTagger(false);
        config.enableChunker(true);//must be ignored for Chunks if no Pos
        TextAnalyzer analyzer = new TextAnalyzer(openNLP,LANGUAGE,config);
        AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
        assertNotNull(analysed);
        //check the default config
        assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
        assertFalse(analyzer.getConfig().isPosTaggerEnable());
        assertTrue(analyzer.getConfig().isChunkerEnabled());

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText

     *          if the underlying process failed to work as
     *          expected
     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        final AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);


        String language = getLanguage(this,ci,false);
        if(!("zh".equals(language) || (language != null && language.startsWith("zh-")))) {
            throw new IllegalStateException("The detected language is NOT 'zh'! "
                + "As this is also checked within the #canEnhance(..) method this "
                + "indicates an Bug in the used EnhancementJobManager implementation. "
                + "Please report this on the dev@apache.stanbol.org or create an "
                + "JIRA issue about this.");
        }
        if(!at.getSentences().hasNext()) { //no sentences  ... use this engine to detect
            //first the sentences
            TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
            try {
                while(sentences.incrementToken()){
                    OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                    Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                    if(log.isTraceEnabled()) {
                        log.trace("detected {}:{}",s,s.getSpan());
                    }
                }
            } catch (IOException e) {
                String message = String.format("IOException while reading from "
                    +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
                log.error(message,e);
                throw new EngineException(this, ci, message, e);
            }
        }
        //now the tokens
        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
          tokens.reset();
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.