Package opennlp.tools.tokenize

Examples of opennlp.tools.tokenize.Tokenizer.tokenizePos()


    </calloutlist>*/
    //<end id="ne-setup"/>

    //<start id="ne-display2"/>
    for (int si = 0; si < sentences.length; si++) { //<co id="co.opennlp.name.eachsent2"/>
      Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]); //<co id="co.opennlp.name.tokenizepos"/>
      String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]); //<co id="co.opennlp.name.convert2strings"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames4"/>

      for (int ni = 0; ni < names.length; ni++) {
        Span startSpan = tokenSpans[names[ni].getStart()]; //<co id="co.opennlp.name.computestart"/>
View Full Code Here


       
        //for all sentences (or the whole Text - if no sentences available)
        while(sections.hasNext()){
            Section section = sections.next();
            //Tokenize section
            opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
            for(int i=0;i<tokenSpans.length;i++){
                Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
                log.trace(" > add {}",token);
            }
        }
View Full Code Here

            String sentence = sentences[i];
            //LOG.debug("Sentence: " + sentence);

            // extract the names in the current sentence
            String[] tokens = tokenizer.tokenize(sentence);
            Span[] tokenspan = tokenizer.tokenizePos(sentence);
            Span[] nameSpans = finder.find(tokens);
            double[] probs = finder.probs();

            if (nameSpans != null && nameSpans.length > 0) {
                //System.out.println("Tokens: " +(new ArrayList(Arrays.asList(tokens))).toString());
View Full Code Here

            }
            String context = StringUtils.join(contextElements, " ");

            // extract the names in the current sentence and
            // keep them store them with the current context
            Span[] tokenSpans = tokenizer.tokenizePos(sentence);
            String[] tokens = Span.spansToStrings(tokenSpans, sentence);
            Span[] nameSpans = finder.find(tokens);
            double[] probs = finder.probs();
            //int lastStartPosition = 0;
            for (int j = 0; j < nameSpans.length; j++) {
View Full Code Here

       
        //for all sentences (or the whole Text - if no sentences available)
        while(sections.hasNext()){
            Section section = sections.next();
            //Tokenize section
            opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
            for(int i=0;i<tokenSpans.length;i++){
                Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
                log.trace(" > add {}",token);
            }
        }
View Full Code Here

            //add dots for multiple line breaks
            text = text.replaceAll("\\n\\n", ".\n");
            Span[] sentenceSpans = sentenceDetector.sentPosDetect(text);
            for (int i = 0; i < sentenceSpans.length; i++) {
                String sentence = sentenceSpans[i].getCoveredText(text).toString();
                Span[] tokenSpans = tokenizer.tokenizePos(sentence);
                String[] tokens = getTokensForSpans(sentence, tokenSpans);
                String[] pos;
                double[] posProbs;
                if(posTagger != null){
                    pos = posTagger.tag(tokens);
View Full Code Here

                    tokenSpans,tokens, //the tokens
                    pos,posProbs, // the pos tags (might be null)
                    chunkSpans,chunkProps); //the chunks (might be null)
            }
        } else {
            Span[] tokenSpans = tokenizer.tokenizePos(text);
            String[] tokens = getTokensForSpans(text, tokenSpans);
            enhance(suggestionCache,site,ci,language,0,text,tokenSpans,tokens,
                null,null,null,null);
        }
        //finally write the entity enhancements
View Full Code Here

            }
            String context = StringUtils.join(contextElements, " ");

            // extract the names in the current sentence and
            // keep them store them with the current context
            Span[] tokenSpans = tokenizer.tokenizePos(sentence);
            String[] tokens = Span.spansToStrings(tokenSpans, sentence);
            Span[] nameSpans = finder.find(tokens);
            double[] probs = finder.probs();
            String[] names = Span.spansToStrings(nameSpans, tokens);
            //int lastStartPosition = 0;
View Full Code Here

    private List<Token> tokenize(Section section,String langauge) {
        Tokenizer tokenizer = getTokenizer(langauge);
        String text = section.getSpan();
        List<Token> tokens = new ArrayList<Token>(text.length()/5); //assume avr. token length is 5
        opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
        for(int i=0;i<tokenSpans.length;i++){
            Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
            log.trace(" > add {}",token);
            tokens.add(token);
        }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.