Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Sentence


    public static final void setup() throws IOException {
        ci = ciFactory.createContentItem(new StringSource(text));
        textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
        analysedTextWithData = createAnalysedText();
        int sentence = text.indexOf('.')+1;
        Sentence sent1 = analysedTextWithData.addSentence(0, sentence);
        expectedSentences.put(sent1, "The Stanbol enhancer can detect famous " +
            "cities such as Paris and people such as Bob Marley.");
       
        Token the = sent1.addToken(0, 3);
        expectedTokens.put(the, "The");
        the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PREP",Pos.Preposition), 0.85));
       
        Token stanbol = sent1.addToken(4,11);
        expectedTokens.put(stanbol, "Stanbol");
        stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(
            0.5));
       
        //use index to create Tokens
        int enhancerStart = sent1.getSpan().toString().indexOf("enhancer");
        Token enhancer = sent1.addToken(enhancerStart,enhancerStart+"enhancer".length());
        expectedTokens.put(enhancer, "enhancer");
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("N", LexicalCategory.Noun),0.87));
View Full Code Here


        //first the sentences
        TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
        try {
            while(sentences.incrementToken()){
                OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                if(log.isTraceEnabled()) {
                    log.trace("detected {}:{}",s,s.getSpan());
                }
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
View Full Code Here

            } else {
                throw RuntimeException.class.cast(e);
            }
        }
        Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
        Sentence context = null;
        MGraph metadata = ci.getMetadata();
        Language lang = new Language(language);
        LiteralFactory lf = LiteralFactory.getInstance();
        ci.getLock().writeLock().lock();
        try { //write TextAnnotations for Named Entities
            while(spans.hasNext()){
                Span span = spans.next();
                switch (span.getType()) {
                    case Sentence:
                        context = (Sentence)context;
                        break;
                    default:
                        Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                        if(nerAnno != null){
                            UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                            //add span related data
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
                                new PlainLiteralImpl(span.getSpan(), lang)));
                            metadata.add(new TripleImpl(ta, ENHANCER_START,
                                lf.createTypedLiteral(span.getStart())));
                            metadata.add(new TripleImpl(ta, ENHANCER_END,
                                lf.createTypedLiteral(span.getEnd())));
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
                                new PlainLiteralImpl(context == null ?
                                        getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                            context.getSpan(), lang)));
                            //add the NER type
                            if(nerAnno.value().getType() != null){
                                metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                            }
                            if(nerAnno.probability() >= 0) {
View Full Code Here

        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {
            log.warn("SentenceDetector model for language {} is no longer available. "
                + "This might happen if the model becomes unavailable during enhancement. "
View Full Code Here

            //first the sentences
            TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
            try {
                while(sentences.incrementToken()){
                    OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                    Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                    if(log.isTraceEnabled()) {
                        log.trace("detected {}:{}",s,s.getSpan());
                    }
                }
            } catch (IOException e) {
                String message = String.format("IOException while reading from "
                    +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
View Full Code Here

        SentenceDetector sentenceDetector = getSentenceDetector(language);
        List<Section> sentences;
        if(sentenceDetector != null){
            sentences = new ArrayList<Section>();
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
                sentences.add(sentence);
            }
        } else {
            sentences = null;
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Sentence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.