Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Span


     * over the {@link Sentence}s of an {@link AnalysedText})
     * @return the iterator
     */
    protected Iterator<Span> getIterator(){
        //the end of this section
        final Span end = new SubSetHelperSpan(getEnd());
        return new Iterator<Span>() {
           
            boolean init = false;
            boolean removed = true;
            private Span span = SectionImpl.this;
           
            @Override
            public boolean hasNext() {
                return getNext() != null;
            }
           
            private Span getNext(){
                Span next = context.spans.higherKey(span);
                return next == null || next.compareTo(end) >= 0 ? null : next;
            }
           
            @Override
            public Span next() {
                init = true;
View Full Code Here


            atFactory.createAnalysedText(textBlob.getValue()));
        Assert.assertEquals(analysedTextWithData, parsedAt);
        Iterator<Span> origSpanIt = analysedTextWithData.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        Iterator<Span> parsedSpanIt = parsedAt.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        while(origSpanIt.hasNext() && parsedSpanIt.hasNext()){
            Span orig = origSpanIt.next();
            Span parsed = parsedSpanIt.next();
            Assert.assertEquals(orig, parsed);
            Set<String> origKeys = orig.getKeys();
            Set<String> parsedKeys = parsed.getKeys();
            Assert.assertEquals(origKeys, parsedKeys);
            for(String key : origKeys){
                List<Value<?>> origValues = orig.getValues(key);
                List<Value<?>> parsedValues = parsed.getValues(key);
                Assert.assertEquals(origValues, parsedValues);
            }
        }
        Assert.assertFalse("Original AnalyzedText MUST NOT have additional Spans",origSpanIt.hasNext());
        Assert.assertFalse("Parsed AnalyzedText MUST NOT have additional Spans",parsedSpanIt.hasNext());
View Full Code Here

            if(spanType == null || spanPos[0] < 0 || spanPos[1] < 0){
                log.warn("Illegal or missing span type, start and/or end position (ignored, json: "+jSpan);
                return;
            }
            //now create the Span
            Span span;
            switch (spanType) {
                case Text:
                    log.warn("Encounterd 'Text' span that is not the first span in the "
                        + "'spans' array (ignored, json: "+node+")");
                    return;
View Full Code Here

  protected void assertAnalysedTextEquality(AnalysedText parsedAt) {
    Assert.assertEquals(at, parsedAt);
        Iterator<Span> origSpanIt = at.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        Iterator<Span> parsedSpanIt = parsedAt.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
        while(origSpanIt.hasNext() && parsedSpanIt.hasNext()){
            Span orig = origSpanIt.next();
            Span parsed = parsedSpanIt.next();
            Assert.assertEquals(orig, parsed);
            Set<String> origKeys = orig.getKeys();
            Set<String> parsedKeys = parsed.getKeys();
            Assert.assertEquals(origKeys, parsedKeys);
            for(String key : origKeys){
                List<Value<?>> origValues = orig.getValues(key);
                List<Value<?>> parsedValues = parsed.getValues(key);
                Assert.assertEquals(origValues, parsedValues);
            }
        }
  }
View Full Code Here

                if(member.isObject()) {
                    ObjectNode jMention = (ObjectNode)member;
                    SpanTypeEnum spanType = SpanTypeEnum.valueOf(jMention.path(MENTION_TYPE_TAG).getTextValue());
                    int spanStart = jMention.path(MENTION_START_TAG).asInt();
                    int spanEnd = jMention.path(MENTION_END_TAG).asInt();
                    Span mentionedSpan = null;
                   
                    switch (spanType) {
            case Chunk:
              mentionedSpan = at.addChunk(spanStart, spanEnd);
              break;
View Full Code Here

        GrammaticalRelationTag gramRelTag = relation.getGrammaticalRelationTag();
        jDependencyRelation.put(RELATION_TYPE_TAG, gramRelTag.getTag());
        jDependencyRelation.put(RELATION_STANBOL_TYPE_TAG, gramRelTag.getGrammaticalRelation().ordinal());
        jDependencyRelation.put(RELATION_IS_DEPENDENT_TAG, (relation.isDependent()));

        Span partner = relation.getPartner();
        if (partner != null) {
            jDependencyRelation.put(RELATION_PARTNER_TYPE_TAG, partner.getType().toString());
            jDependencyRelation.put(RELATION_PARTNER_START_TAG, partner.getStart());
            jDependencyRelation.put(RELATION_PARTNER_END_TAG, partner.getEnd());
        } else {
            jDependencyRelation.put(RELATION_PARTNER_TYPE_TAG, ROOT_TAG);
            jDependencyRelation.put(RELATION_PARTNER_START_TAG, 0);
            jDependencyRelation.put(RELATION_PARTNER_END_TAG, 0);
        }
View Full Code Here

        if (!isDependent.isBoolean()) {
            throw new IllegalStateException("Field 'isDependent' must have a true/false format");
        }

        Span partnerSpan = null;
        String typeString = jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG).getTextValue();

        if (!typeString.equals(ROOT_TAG)) {
            SpanTypeEnum spanType = SpanTypeEnum.valueOf(jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG)
                    .getTextValue());
View Full Code Here

        NavigableMap<Integer,Token> sectionBorders = new TreeMap<Integer,Token>();
        boolean firstTokenInSentence = true;
        Sentence sentence = null;
        final List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>();
        while(tokenIt.hasNext()){
            Span span = tokenIt.next();
            switch (span.getType()) {
                case Token:
                    Token word = (Token)span;
                    Integer wordIndex = sentimentTokens.size();
                    Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
                    boolean addToList = false;
                    Sentiment sentiment = null;
                    if(sentimentAnnotation != null && sentimentAnnotation.value() != null &&
                            !sentimentAnnotation.value().equals(ZERO)){
                        sentiment = new Sentiment(word, sentimentAnnotation.value(),
View Full Code Here

            Language lang = new Language(language);
            LiteralFactory lf = LiteralFactory.getInstance();
            ci.getLock().writeLock().lock();
            try { //write TextAnnotations for Named Entities
                while(spans.hasNext()){
                    Span span = spans.next();
                    switch (span.getType()) {
                        case Sentence:
                            context = (Sentence)context;
                            break;
                        default:
                            Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                            if(nerAnno != null){
                                UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                                //add span related data
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
                                    new PlainLiteralImpl(span.getSpan(), lang)));
                                metadata.add(new TripleImpl(ta, ENHANCER_START,
                                    lf.createTypedLiteral(span.getStart())));
                                metadata.add(new TripleImpl(ta, ENHANCER_END,
                                    lf.createTypedLiteral(span.getEnd())));
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
                                    new PlainLiteralImpl(context == null ?
                                            getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                                context.getSpan(), lang)));
                                //add the NER type
                                if(nerAnno.value().getType() != null){
                                    metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                                }
View Full Code Here

            Set<SpanTypeEnum> enclosedSpanTypes, boolean isUnicaseLanguage){
        this.section = section;
        Iterator<Span> enclosed = section.getEnclosed(enclosedSpanTypes);
        List<ChunkData> activeChunks = new ArrayList<ChunkData>();
        while(enclosed.hasNext()){
            Span span = enclosed.next();
            if(span.getStart() >= span.getEnd()){ //save guard against empty spans
                log.warn("Detected Empty Span {} in section {}: '{}'",
                    new Object[]{span,section, section.getSpan()});
            }
            if(span.getType() == SpanTypeEnum.Chunk){
                ChunkData chunkData = new ChunkData(tpc,(Chunk)span);
                if(chunkData.isProcessable()){
                  activeChunks.add(0, chunkData);
                  chunkData.startToken = tokens.size();
                    if(log.isDebugEnabled()){
                        log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'",
                            new Object []{
                            chunkData.chunk.getType(),
                            chunkData.startToken,
                            chunkData.chunk.getSpan()
                            });
                    }
                } //else ignore chunks that are not processable
            } else if(span.getType() == SpanTypeEnum.Token){
                TokenData tokenData = new TokenData(tpc,tokens.size(),(Token)span,
                    activeChunks.isEmpty() ? null : activeChunks.get(0));
                if(log.isDebugEnabled()){
                    log.debug("  > {}: {} {}(pos:{}) chunk: '{}'",
                        new Object[]{tokenData.index,tokenData.token,
                            tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "",
                            tokenData.token.getAnnotations(POS_ANNOTATION),
                            tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"});
                }
                if(!tokenData.hasAlphaNumeric){
                    tokenData.isLinkable = false;
                    tokenData.isMatchable = false;
                } else {
                    // (1) apply basic rules for linkable/processable tokens
                    //determine if the token should be linked/matched
                    tokenData.isLinkable = tokenData.isLinkablePos != null ? tokenData.isLinkablePos : false;
                    //matchabel := linkable OR has matchablePos
                    tokenData.isMatchable = tokenData.isLinkable ||
                            (tokenData.isMatchablePos != null && tokenData.isMatchablePos);
                   
                    //(2) for non linkable tokens check for upper case rules
                    if(!tokenData.isLinkable && tokenData.upperCase &&
                            tokenData.index > 0 && //not a sentence or sub-sentence start
                            !tokens.get(tokenData.index-1).isSubSentenceStart){
                        //We have an upper case token!
                        if(tpc.isLinkUpperCaseTokens()){
                            if(tokenData.isMatchable) { //convert matchable to
                                tokenData.isLinkable = true; //linkable
                                tokenData.isMatchable = true;
                            } else { // and other tokens to
                                tokenData.isMatchable = true; //matchable
                            }
                        } else {
                            //finally we need to convert other Tokens to matchable
                            //if MatchUpperCaseTokens is active
                            if(!tokenData.isMatchable && tpc.isMatchUpperCaseTokens()){
                                tokenData.isMatchable = true;
                            }
                        }
                    } //else not an upper case token
                   
                    //(3) Unknown POS tag Rules (see STANBOL-1049)
                    if(!tokenData.isLinkable && (tokenData.isLinkablePos == null ||
                            tokenData.isMatchablePos == null)){
                        if(isUnicaseLanguage || !tpc.isLinkOnlyUpperCaseTokensWithUnknownPos()){
                            if(tokenData.isLinkablePos == null && tokenData.hasSearchableLength){
                                tokenData.isLinkable = true;
                                tokenData.isMatchable = true;
                            } //else no need to change the state
                        } else { //non unicase language and link only upper case tokens enabled
                            if(tokenData.upperCase && // upper case token
                                    tokenData.index > 0 && //not a sentence or sub-sentence start
                                    !tokens.get(tokenData.index-1).isSubSentenceStart){
                                if(tokenData.hasSearchableLength && tokenData.isLinkablePos == null){
                                    tokenData.isLinkable = true;
                                    tokenData.isMatchable = true;
                                } else if(tokenData.isMatchablePos == null){
                                    tokenData.isMatchable = true;
                                }
                            } else if(tokenData.hasSearchableLength &&  //lower case and long token
                                    tokenData.isMatchablePos == null){
                                tokenData.isMatchable = true;
                            } //else lower case and short word
                        }
                    } //else already linkable or POS tag present
                }
                log.debug("    - {}",tokenData);
                //add the token to the list
                tokens.add(tokenData);
                if(!hasLinkableToken){
                    hasLinkableToken = tokenData.isLinkable;
                }
                Iterator<ChunkData> activeChunkIt = activeChunks.iterator();
                while(activeChunkIt.hasNext()){
                  ChunkData activeChunk = activeChunkIt.next();
                    if (tokenData.isLinkable){
                        //ignore matchableCount in Chunks with linkable Tokens
                        activeChunk.matchableCount = -10; //by setting the count to -10
                    } else if(tokenData.isMatchable){
                        activeChunk.matchableCount++;
                    }
                    if(tokenData.isMatchable){ //for matchable tokens
                        //update the matchable span within the active chunk
                        if(activeChunk.matchableStart < 0){
                            activeChunk.matchableStart = tokenData.index;
                            activeChunk.matchableStartCharIndex = tokenData.token.getStart();
                        }
                        if(activeChunk.matchableStart >= 0){ //if start is set also set end
                            activeChunk.matchableEnd = tokenData.index;
                            activeChunk.matchableEndCharIndex = tokenData.token.getEnd();
                        }
                    }
                    if(span.getEnd() >= activeChunk.getEndChar()){
                        //this is the last token in the current chunk
                        activeChunk.endToken = tokens.size()-1;
                        if(log.isDebugEnabled()){
                          log.debug(" << end Chunk {} '{}' @pos: {}", new Object[]{
                              activeChunk.chunk, activeChunk.chunk.getSpan(),
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Span

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.