Package edu.stanford.nlp.pipeline

Examples of edu.stanford.nlp.pipeline.CoreMapAggregator


  public static List<CoreMap> findAndMergeNumbers(CoreMap annotationRaw){
    //copy annotation to preserve its integrity
    CoreMap annotation = new ArrayCoreMap(annotationRaw);
    // Find and label numbers
    List<CoreMap> numbers = NumberNormalizer.findNumbers(annotation);
    CoreMapAggregator numberAggregator = CoreMapAggregator.getAggregator(CoreMapAttributeAggregator.DEFAULT_NUMERIC_AGGREGATORS, CoreAnnotations.TokensAnnotation.class);

    // We are going to mark the token begin and token end for each token so we can more easily deal with
    // ensuring correct token offsets for merging
    //get sentence offset
    Integer startTokenOffset = annotation.get(CoreAnnotations.TokenBeginAnnotation.class);
    if (startTokenOffset == null) {
      startTokenOffset = 0;
    }
    //set token offsets
    int i = 0;
    List<Integer> savedTokenBegins = new LinkedList<Integer>();
    List<Integer> savedTokenEnds = new LinkedList<Integer>();
    for (CoreMap c:annotation.get(CoreAnnotations.TokensAnnotation.class)) {
      //set token begin
      if( (i==0 && c.get(CoreAnnotations.TokenBeginAnnotation.class) != null) || (i > 0 && !savedTokenBegins.isEmpty()) ){
        savedTokenBegins.add(c.get(CoreAnnotations.TokenBeginAnnotation.class));
      }
      c.set(CoreAnnotations.TokenBeginAnnotation.class, i+startTokenOffset);
      i++;
      //set token end
      if( (i==1 && c.get(CoreAnnotations.TokenEndAnnotation.class) != null) || (i > 1 && !savedTokenEnds.isEmpty()) ){
        savedTokenEnds.add(c.get(CoreAnnotations.TokenEndAnnotation.class));
      }
      c.set(CoreAnnotations.TokenEndAnnotation.class, i+startTokenOffset);
    }
    //merge numbers
    final Integer startTokenOffsetFinal = startTokenOffset;
    List<CoreMap> mergedNumbers = numberAggregator.merge(annotation.get(CoreAnnotations.TokensAnnotation.class), numbers,
        in -> Interval.toInterval(
              in.get(CoreAnnotations.TokenBeginAnnotation.class) - startTokenOffsetFinal,
              in.get(CoreAnnotations.TokenEndAnnotation.class) - startTokenOffsetFinal)
    );
    //restore token offsets
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.pipeline.CoreMapAggregator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.