Examples of AnnotationSet


Examples of gate.AnnotationSet

        AnnotationParm parm = annSpecs.get(curAnnSpec);
        String typeName = parm.getTypeName();
        Set<String>  featureSet = parm.getFeatureSet();
        String featureName = parm.getFeatureName();
        String constantValue = parm.getConstantValue();
        AnnotationSet tmpSet = null;
        //logger.debug("Checking specification type/feature: "+typeName+"/"+featureName);
        //logger.debug("Annotations in CurAnnSet: "+curOff_Set.size());
        if(typeName.equals("@STRING")) {
          // if we arrive at this spec, set the content to the character
          // at the current position, leave the ann null and advance the
          // offset by one and reset the spec number to 0
          nextAnnotation = null;
          nextSpecNo = curAnnSpec;
          nextContent = theDocument.getContent().toString().substring(curOffset,curOffset+1);
          nextOffset = new Long(curOffset);
          nextSpec = annSpecs.get(nextSpecNo).toString();
          curAnnSpec = 0;
          curOffset++;
          curOff_Set = Utils.getAnnotationsAtOffset(theAnnSet,new Long(curOffset));
          //System.out.println("B: Going to next offset="+curOffset+" found "+curOff_Set.size());
          break;
        }
        // TODO: when is this needed? if curOff_Set is empty, only if one
        // of the next annSpecs is @String, otherwise we will never get
        // something
        if(curOff_Set.size() == 0) {
          //System.out.println("curOff_Set is empty, get for next annspec");
          curAnnSpec++;
          if(curAnnSpec >= annSpecs.size()) {
            curAnnSpec = 0;
            curOffset++;
            curOff_Set = Utils.getAnnotationsAtOffset(theAnnSet,new Long(curOffset));
            //System.out.println("C: Going to next offset="+curOffset+" found "+curOff_Set.size());
          }
          continue;
        }
        if(featureSet == null) {
          tmpSet = curOff_Set.get(typeName);
        } else {
          tmpSet = curOff_Set.get(typeName,featureSet);
        }
        //logger.debug("Size of tmpSet: "+tmpSet.size());
        //System.out.println("After checking for anns, tmpSet has "+tmpSet.size());
        if(!tmpSet.isEmpty()) {
          // get the longest annotation in the set, but only if it fits
          // and ends before toOffset
          Annotation ann = null;
          int maxlength = -1;
          Iterator<Annotation> it = tmpSet.iterator();
          while(it.hasNext()) {
            Annotation tmpAnn = it.next();
            if(Utils.length(tmpAnn) > maxlength &&
               tmpAnn.getEndNode().getOffset().intValue() <= toOffset) {
              maxlength = Utils.length(tmpAnn);
View Full Code Here

Examples of gate.AnnotationSet

      throw new GateRuntimeException("Can only handle DocumentImpl not " +
          document.getClass());
    }
   
    // Get the annotations in document order
    AnnotationSet anns =
      document.getAnnotations(inputSpecificationSet).get(inputSpecificationType);
    List<Annotation> annlist = gate.Utils.inDocumentOrder(anns);
    List<AnnotationSpec> annspecs = new LinkedList<AnnotationSpec>();
    StringBuilder newText = new StringBuilder();
    long curoffset = 0;
    // System.err.println("Processing original annotations: "+anns.size());
    for(Annotation ann : annlist) {
      String txt;
      if(inputSpecificationFeature == null) {
        txt = gate.Utils.stringFor(document, ann);
        newText.append(txt);
        annspecs.add(new AnnotationSpec(ann,curoffset,curoffset+txt.length(),ann.getId()));
        curoffset += txt.length();
        newText.append(actualSeparatorString);
        curoffset += actualSeparatorString.length();
      } else {
        txt = (String)ann.getFeatures().get(inputSpecificationFeature);
        if(txt != null) {
          newText.append(txt);
          annspecs.add(new AnnotationSpec(ann,curoffset,curoffset+txt.length(),ann.getId()));
          curoffset += txt.length();
          newText.append(actualSeparatorString);
          curoffset += actualSeparatorString.length();
        }
      }
    }
   
    FeatureMap theparms = Factory.newFeatureMap();
    theparms.put("collectRepositioningInfo", document.getCollectRepositioningInfo());
    theparms.put("encoding", ((DocumentImpl) document).getEncoding());
    theparms.put("markupAware", document.getMarkupAware());
    theparms.put("mimeType", ((DocumentImpl) document).getMimeType());
    theparms.put("preserveOriginalContent", document.getPreserveOriginalContent());
    theparms.put("stringContent", newText.toString());
    FeatureMap thefeats = Factory.newFeatureMap();
    FeatureMap docfeats = document.getFeatures();
    thefeats.putAll(docfeats);

    String theName = document.getName();
    // create a copy of the current document
    Document newDoc;
    try {
      newDoc = (Document) Factory.createResource(
              "gate.corpora.DocumentImpl",
              theparms,
              thefeats,
              theName+"_virtual");
    } catch (ResourceInstantiationException ex) {
      throw new GateRuntimeException(ex);
    }

    // set the initial annotations in the virtual document
    AnnotationSet newSet = newDoc.getAnnotations(virtualSpecificationSet);
    for(AnnotationSpec annspec : annspecs) {
      FeatureMap fm = Factory.newFeatureMap();
      fm.putAll(annspec.annotation.getFeatures());
      fm.put("orig_id",annspec.origId);     
      try {
        newSet.add(annspec.fromOffset, annspec.toOffset, virtualSpecificationType, fm);
      } catch(InvalidOffsetException ex) {
        throw new GateRuntimeException(
          "Invalid offset when creating annotation for virtual document: from/to/doclength: "+
          annspec.fromOffset+"/"+annspec.toOffset+"/"+newDoc.getContent().size(),ex);
      }
View Full Code Here

Examples of gate.AnnotationSet

            + getDocument().getName());

    TextForSpecIterator it =
            annotatedDocumentTransformer.getIterator(getDocument(),inputAnnotationSetName);

    AnnotationSet os = getDocument().getAnnotations(outputAnnotationSetName);
    while(it.hasNext()) {
      it.next();
      Annotation ann = it.getAnnotation();
      // if the annotation is null, skip to next match. This can happen for
      // a @STRING specification which does not make sense here
      if(ann == null) {
        continue;
      }
      logger.debug("Got annotation: "+ann);
      int specNo = it.getSpecNo();
      String spec = sourceSpecificationsVector.get(it.getSpecNo());
      FeatureMap fm = Factory.newFeatureMap();
      fm.putAll(ann.getFeatures());
      fm.put("annID",ann.getId());
      fm.put("annSet", inputAnnotationSetName);
      String content = it.getContent();
      fm.put("content", content);
      fm.put("specNo",specNo+"");
      fm.put("spec", spec);
      os.add(ann.getStartNode(),ann.getEndNode(),outputAnnotationTypeName,fm);
    }
    fireStatusChanged("AnnotatedBySpecPR completed");

  }
View Full Code Here

Examples of gate.AnnotationSet

      }

      if(directoryFile != null) {
        String out = "";
        if(getSavePreservingFormat()) {
          AnnotationSet as = newDoc.getAnnotations(annotationSetNames.get(0));
          out = newDoc.toXml(as,addFeaturesToPreservingFormat);
        } else {
          out = newDoc.toXml();
        }
        File outFile = new File(directoryFile, theName+copiedDocNameSuffix+".xml");
View Full Code Here

Examples of gate.AnnotationSet

        String annotationSetName = tmp1[0];
        String annotationTypeName = (tmp1.length == 2) ? tmp1[1] : null;
        if(annotationSetName.equals("")) {
          annotationSetName = null;
        }
        AnnotationSet theAnns = virtualDoc.getAnnotations(annotationSetName);
        AnnotationSet targetSet = originalDoc.getAnnotations(annotationSetName);
        if(annotationTypeName != null) {
          theAnns = theAnns.get(annotationTypeName);
        }
        for (Annotation theAnn : theAnns) {
          addMappedAnnotation(targetSet,theAnn,getBackwardOffsetMap());
View Full Code Here

Examples of gate.AnnotationSet

        String annotationSetName = tmp1[0];
        String annotationTypeName = (tmp1.length == 2) ? tmp1[1] : null;
        if(annotationSetName.equals("")) {
          annotationSetName = null;
        }
        AnnotationSet theAnns = originalDoc.getAnnotations(annotationSetName);
        AnnotationSet targetSet = virtualDoc.getAnnotations(annotationSetName);
        if(annotationTypeName != null) {
          theAnns = theAnns.get(annotationTypeName);
        }
        for (Annotation theAnn : theAnns) {
          addMappedAnnotation(targetSet,theAnn,getForwardOffsetMap());
View Full Code Here

Examples of gate.AnnotationSet

      throw new ExecutionException(
        "No document to process!"
      );
    }

    AnnotationSet inputAS = null;
    if(inputAnnotationSet == null ||
       inputAnnotationSet.equals("")) inputAS = theDocument.getAnnotations();
    else inputAS = theDocument.getAnnotations(inputAnnotationSet);

    outputAS = null;
    if(outputAnnotationSet == null ||
       outputAnnotationSet.equals("")) outputAS = theDocument.getAnnotations();
    else outputAS = theDocument.getAnnotations(outputAnnotationSet);


    AnnotationSet processAnns = null;
    if(wordAnnotationType == null || wordAnnotationType.isEmpty()) {
      throw new GateRuntimeException("Word annotation type must not be empty!");
    }
   
    if(spaceAnnotationType == null || spaceAnnotationType.isEmpty()) {
      throw new GateRuntimeException("Space annotation type must not be empty!");
    }
    Set<String> typeSet = new HashSet<String>();
    typeSet.add(wordAnnotationType);
    typeSet.add(spaceAnnotationType);
    processAnns = inputAS.get(typeSet);
   
    AnnotationSet containingAnns = null;
    if(containingAnnotationType == null || containingAnnotationType.isEmpty()) {
      // leave the containingAnns null to indicate we do not use containing annotations
    } else {
      containingAnns = inputAS.get(containingAnnotationType);
      //System.out.println("DEBUG: got containing annots: "+containingAnns.size()+" type is "+containingAnnotationType);
    }
   
    AnnotationSet splitAnns = null;
    if(splitAnnotationType == null || splitAnnotationType.isEmpty()) {
      // leave the splitAnns null to indicate we do not use containing annotations
    } else {
      splitAnns = inputAS.get(splitAnnotationType);
      //System.out.println("DEBUG: got split annots: "+splitAnns.size()+" type is "+splitAnnotationType);
      if(splitAnns.size() == 0) {
        splitAnns = null;
      }
    }
   
   
    fireStatusChanged("Performing look-up in " + theDocument.getName() + "...");

    long endOffset = theDocument.getContent().size();

    // now split the document into chunks if necessary:
    // = for each containing annotation we create a chunk,
    // = each split annotation forces the end of a chunk
    // Each chunk is represented by an instance of Chunk
    if(containingAnns == null) {
      if(splitAnns != null) { // we need to do some additional chunking
        List<Annotation> splitAnnsList = Utils.inDocumentOrder(splitAnns);
        long lastOffset = 0;
        for(Annotation splitAnn : splitAnnsList) {
          long splitOffset = splitAnn.getStartNode().getOffset();
          if(splitOffset > lastOffset) {
            doAnnotateChunk(Chunk.makeChunk(
                document,lastOffset,splitOffset,!caseSensitive,
                processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
                matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
          }
          lastOffset = splitOffset;
        } // for
        // anything left?
        if(lastOffset < endOffset) {
          doAnnotateChunk(Chunk.makeChunk(document,lastOffset,endOffset,!caseSensitive,
              processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
              matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
        }
      } else {
        // create a chunk from the whole document
        doAnnotateChunk(Chunk.makeChunk(document,0,endOffset,!caseSensitive,
            processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
            matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
      }
    } else {
      for(Annotation containingAnn : containingAnns) {
        //System.out.println("processing containing annot "+containingAnn);
        // if we do have split annotations and we have split annotations within the range
        // of this containing annotation, we need to do further chunking
        if(splitAnns != null) {
          AnnotationSet containedSplits = Utils.getContainedAnnotations(splitAnns, containingAnn);
          if(containedSplits.size() > 0) {
            // we need to split
           
           
            List<Annotation> splitAnnsList = Utils.inDocumentOrder(containedSplits);
            long lastOffset = containingAnn.getStartNode().getOffset();
View Full Code Here

Examples of gate.AnnotationSet

      throw new ExecutionException(
        "No document to process!"
      );
    }

    AnnotationSet inputAS = null;
    if(inputAnnotationSet == null ||
       inputAnnotationSet.equals("")) inputAS = theDocument.getAnnotations();
    else inputAS = theDocument.getAnnotations(inputAnnotationSet);

    AnnotationSet processAnns = null;
    if(wordAnnotationType == null || wordAnnotationType.isEmpty()) {
      throw new GateRuntimeException("Word annotation type must not be empty!");
    }
    processAnns = inputAS.get(wordAnnotationType);
   
    AnnotationSet containingAnns = null;
    if(containingAnnotationType == null || containingAnnotationType.isEmpty()) {
      // leave the containingAnns null to indicate we do not use containing annotations
    } else {
      containingAnns = inputAS.get(containingAnnotationType);
      //System.out.println("DEBUG: got containing annots: "+containingAnns.size()+" type is "+containingAnnotationType);
    }
   
    AnnotationSet outputAS = document.getAnnotations(outputAnnotationSet);
   
    fireStatusChanged("Performing look-up in " + theDocument.getName() + "...");

    if(containingAnns == null) {
      // go through all word annotations
      for(Annotation ann : processAnns) {
        Iterator<Lookup> ret = doMatch(featureAsString(ann,textFeature),matchAtStartOnly,matchAtEndOnly);
        if(ret != null) {
          processMatch(ann,ret, inputAS, outputAS);
        } else {
          processNonMatch(ann,ret,inputAS,outputAS);
        }
      }
    } else {
      for(Annotation containingAnn : containingAnns) {
        AnnotationSet containedAnns = Utils.getContainedAnnotations(processAnns, containingAnn);
        for(Annotation ann : containedAnns) {
          Iterator<Lookup> ret = doMatch(featureAsString(ann,textFeature),matchAtStartOnly,matchAtEndOnly);
          if(ret != null) {
            processMatch(ann,ret, inputAS, outputAS);
          } else {
View Full Code Here

Examples of gate.AnnotationSet

    chunk.from = (int)fromOffset;
    chunk.to = (int)toOffset;
    chunk.text = new char[chunk.initialLength];
    chunk.endOffsets = new int[chunk.initialLength];
    chunk.startOffsets = new int[chunk.initialLength];
    AnnotationSet actualAnns = processAnns.get(fromOffset,toOffset);
    AnnotationSet wordAnns = actualAnns.get(wordAnnotationType);
    if(wordAnns.isEmpty()) {
      chunk.length = 0;
      return chunk;
    }
    List<Annotation> actualAnnsList = Utils.inDocumentOrder(actualAnns);
    int i = 0; // index into the text, startOffsets and endOffsets arrays
View Full Code Here

Examples of gate.AnnotationSet

    parms = Factory.newFeatureMap();
    File docFile = new File(testingDir,"extgaz2docprep.xml");
    parms.put("sourceUrl",docFile.toURI().toURL());
    Document doc = (Document)
         Factory.createResource("gate.corpora.DocumentImpl", parms);
    AnnotationSet lookups = doc.getAnnotations().get("OutType");
    assertEquals(0,lookups.size());
    // run the gazetteer on the document
    eg.setDocument(doc);
    eg.execute();
    // check if we got the correct annotations
    AnnotationSet tokens = doc.getAnnotations().get("Token");
    assertEquals(46,tokens.size());
    AnnotationSet sentences = doc.getAnnotations().get("Sentence");
    assertEquals(4,sentences.size());
    lookups = doc.getAnnotations().get("OutType");
    if(backendNr == 3) {
      assertEquals(12,lookups.size());
    } else {
      assertEquals(14,lookups.size());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.