Examples of gate.AnnotationSet

gate.AnnotationSet

  private String dictionaryEncoding = "UTF-8";


  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();


    // getdoc.get text
    String text = document.getContent().toString();


    // get sentence annotations
    AnnotationSet sentences = annotations.get("Sentence");


    // order sentences


    List<Annotation> sentList = new LinkedList<Annotation>();


    for (Iterator iterator = sentences.iterator(); iterator.hasNext();) {
      sentList.add((Annotation) iterator.next());


    }


    java.util.Collections.sort(sentList, new gate.util.OffsetComparator());


    // for each sentence get token annotations
    for (Iterator iterator = sentList.iterator(); iterator.hasNext();) {
      Annotation annotation = (Annotation) iterator.next();


      AnnotationSet sentenceTokens = annotations.get("Token", annotation
          .getStartNode().getOffset(), annotation.getEndNode()
          .getOffset());


      // create a list


      List<Annotation> tokenList = new LinkedList<Annotation>();


      for (Iterator iterator2 = sentenceTokens.iterator(); iterator2
          .hasNext();) {
        tokenList.add((Annotation) iterator2.next());


      }

View Full Code Here

  private List<NameFinder> finder;


  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (inputASName != null && inputASName.length() > 0)
      annotations = document.getAnnotations(inputASName);
    else
      annotations = document.getAnnotations();


    AnnotationSet outputAnnots;
    if (outputASName != null && outputASName.length() > 0)
      outputAnnots = document.getAnnotations(outputASName);
    else
      outputAnnots = document.getAnnotations();

View Full Code Here

  URL model;


  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();


    // get sentence annotations
    //AnnotationSet sentences = document.getAnnotations("Sentence");


    // getdoc.get text
    String text = document.getContent().toString();
    // run tokenizer
    Span[] spans = tokenizer.tokenizePos(text);
    // compare the resulting
    // spans and add annotations


    for (int i = 0; i < spans.length; i++) {


      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      fm.put("string", text.substring(spans[i].getStart(), spans[i]
          .getEnd()));
      // source
//      fm.put("type", "urn:lsid:ontotext.com:kim:iextraction:Token");


      try {
        annotations.add(Long.valueOf(spans[i].getStart()), Long
            .valueOf(spans[i].getEnd()), "Token", fm);


      } catch (InvalidOffsetException e) {
        e.printStackTrace();
        throw new RuntimeException(e);

View Full Code Here


  @Override
  public void execute() throws ExecutionException {
    boolean isSentenceSplitted = false;
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();
    // getdoc.get text
    String text = document.getContent().toString();
    // run tokenizer
    int[] spans = splitter.sentPosDetect(text);
    // compare the resulting
    // sentences and add annotations
    int prevSpan = 0;
    for (int i = 0; i < spans.length; i++) {


      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      // source
      // fm.put("type", "urn:lsid:ontotext.com:kim:iextraction:Sentence");


      try {
        // annotations.add(Long.valueOf(spans[i].getStart()),
        // Long.valueOf(spans[i].getEnd()), "Sentence", fm);
        // annotations.add(i == 0 ? Long.valueOf(prevSpan) : Long
        // .valueOf(prevSpan + countSpaces(prevSpan - 1)),
        // i == (spans.length - 1) ? Long.valueOf(spans[i]) : Long
        // .valueOf(spans[i] - 1), "Sentence", fm);
        int start = prevSpan;
        int end = spans[i];


        // remove leading spaces of a sentence
        for (int j = start; j < end
            && Character.isWhitespace(text.charAt(j)); j++) {
          start = j + 1;
        }


        // remove trailing spaces of a sentence
        if (end > 1) {
          for (int j = end; j > start
              && Character.isWhitespace(text.charAt(j - 1)); j--) {
            end = j - 1;
          }
        }


        annotations.add(Long.valueOf(start), Long.valueOf(end),
            "Sentence", fm);
        if(!isSentenceSplitted)
          isSentenceSplitted = true;


      } catch (InvalidOffsetException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }


      prevSpan = spans[i];
    }
    if(!isSentenceSplitted){
      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      try {
        annotations.add(new Long(0), new Long(text.length()),
            "Sentence", fm);
      } catch (InvalidOffsetException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

View Full Code Here


  @SuppressWarnings("unchecked")
  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0) {
      annotations = document.getAnnotations(annotationSetName);
    } else {
      annotations = document.getAnnotations();
    }


    // getdoc.get text
    // String text = document.getContent().toString();


    // get token and sentence annotations
    AnnotationSet sentences = annotations.get("Sentence");
    AnnotationSet tokensAS = annotations.get("Token");


    if (sentences != null && sentences.size() > 0 && tokensAS != null
        && tokensAS.size() > 0) {


      // order them
      List<Annotation> sentList = new LinkedList<Annotation>();


      for (Iterator iterator = sentences.iterator(); iterator.hasNext();) {
        sentList.add((Annotation) iterator.next());


      }


      java.util.Collections.sort(sentList,
          new gate.util.OffsetComparator());


      // for each sentence get token annotations
      for (Iterator iterator = sentList.iterator(); iterator.hasNext();) {
        Annotation annotation = (Annotation) iterator.next();


        AnnotationSet sentenceTokens = annotations.get("Token",
            annotation.getStartNode().getOffset(), annotation
                .getEndNode().getOffset());


        // create a list


        List<Annotation> annList = new LinkedList<Annotation>();


        for (Iterator<Annotation> iterator2 = sentenceTokens.iterator(); iterator2
            .hasNext();) {
          annList.add(iterator2.next());


        }


        // order on offset
        Collections.sort(annList, new gate.util.OffsetComparator());


        // make the array be string[] sentence
        String[] tokens = new String[sentenceTokens.size()];
        String[] postags = new String[sentenceTokens.size()];
        int i = 0;
        for (Iterator iterator3 = annList.iterator(); iterator3
            .hasNext();) {


          Annotation token = (Annotation) iterator3.next();

View Full Code Here

    String locType = "location";
    String orgType = "org";
    String perType = "person";


    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;
    Element<Object> element;
    ArrayList<Annotation> tokens, dToks, lToks, oToks, pToks;
    AnnotationSet dats, locs, orgs, pers;
    String chunk, label;


    for (Annotation sentence : sentences) {


      //extract NEs from sentence

View Full Code Here

    pipeline.execute();
    Factory.deleteResource(corpus);
    Factory.deleteResource(pipeline);


    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;

View Full Code Here

    pipeline.execute();
    Factory.deleteResource(corpus);
    Factory.deleteResource(pipeline);


    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    Alphabet labelAlphabet = tagger.getYAlphabet();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;

View Full Code Here

    //specify noun and verb chunk types
    String nChunkType = "nchunk";
    String vChunkType = "vchunk";


    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;
    Element<Object> element;
    ArrayList<Annotation> tokens, nToks, vToks;
    AnnotationSet nChunks, vChunks;
    String chunk, label;


    for (Annotation sentence : sentences) {


      //extract noun and verb chunks from sentence

View Full Code Here

  @SuppressWarnings("unchecked")
  public void extractData(String sequenceType, String elementType, String labelType)
  throws ExecutionException, InvalidOffsetException, ResourceInstantiationException {


    //extract tokens from text
    AnnotationSet tokens = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    ElementSequence<Element<Object>> sequence;
    SparseVector[] x;
    Object[] y;
    char[] chars;

View Full Code Here

0 1 2 3 4

TOP

Related Classes of gate.AnnotationSet

at.ofai.gate.japeutils.JapeUtils

at.ofai.gate.japeutils.ops.NotCoextensive

at.ofai.gate.japeutils.ops.NotEndsAt

at.ofai.gate.japeutils.ops.NotOverlaps

at.ofai.gate.japeutils.ops.NotStartsAt

at.ofai.gate.virtualdocuments.AnnotateBySpecPR

at.ofai.gate.virtualdocuments.AnnotatedDocumentTransformer

at.ofai.gate.virtualdocuments.CopyVirtualDocumentPR

at.ofai.gate.virtualdocuments.ExportContainedAnnotationsPR

at.ofai.gate.virtualdocuments.FeatureLanguageAnalyserPR

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.