Package gate

Examples of gate.AnnotationSet


  private String dictionaryEncoding = "UTF-8";

  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();

    // getdoc.get text
    String text = document.getContent().toString();

    // get sentence annotations
    AnnotationSet sentences = annotations.get("Sentence");

    // order sentences

    List<Annotation> sentList = new LinkedList<Annotation>();

    for (Iterator iterator = sentences.iterator(); iterator.hasNext();) {
      sentList.add((Annotation) iterator.next());

    }

    java.util.Collections.sort(sentList, new gate.util.OffsetComparator());

    // for each sentence get token annotations
    for (Iterator iterator = sentList.iterator(); iterator.hasNext();) {
      Annotation annotation = (Annotation) iterator.next();

      AnnotationSet sentenceTokens = annotations.get("Token", annotation
          .getStartNode().getOffset(), annotation.getEndNode()
          .getOffset());

      // create a list

      List<Annotation> tokenList = new LinkedList<Annotation>();

      for (Iterator iterator2 = sentenceTokens.iterator(); iterator2
          .hasNext();) {
        tokenList.add((Annotation) iterator2.next());

      }
View Full Code Here


  private List<NameFinder> finder;

  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (inputASName != null && inputASName.length() > 0)
      annotations = document.getAnnotations(inputASName);
    else
      annotations = document.getAnnotations();

    AnnotationSet outputAnnots;
    if (outputASName != null && outputASName.length() > 0)
      outputAnnots = document.getAnnotations(outputASName);
    else
      outputAnnots = document.getAnnotations();
View Full Code Here

  URL model;

  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();

    // get sentence annotations
    //AnnotationSet sentences = document.getAnnotations("Sentence");

    // getdoc.get text
    String text = document.getContent().toString();
    // run tokenizer
    Span[] spans = tokenizer.tokenizePos(text);
    // compare the resulting
    // spans and add annotations

    for (int i = 0; i < spans.length; i++) {

      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      fm.put("string", text.substring(spans[i].getStart(), spans[i]
          .getEnd()));
      // source
//      fm.put("type", "urn:lsid:ontotext.com:kim:iextraction:Token");

      try {
        annotations.add(Long.valueOf(spans[i].getStart()), Long
            .valueOf(spans[i].getEnd()), "Token", fm);

      } catch (InvalidOffsetException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
View Full Code Here

  @Override
  public void execute() throws ExecutionException {
    boolean isSentenceSplitted = false;
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0)
      annotations = document.getAnnotations(annotationSetName);
    else
      annotations = document.getAnnotations();
    // getdoc.get text
    String text = document.getContent().toString();
    // run tokenizer
    int[] spans = splitter.sentPosDetect(text);
    // compare the resulting
    // sentences and add annotations
    int prevSpan = 0;
    for (int i = 0; i < spans.length; i++) {

      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      // source
      // fm.put("type", "urn:lsid:ontotext.com:kim:iextraction:Sentence");

      try {
        // annotations.add(Long.valueOf(spans[i].getStart()),
        // Long.valueOf(spans[i].getEnd()), "Sentence", fm);
        // annotations.add(i == 0 ? Long.valueOf(prevSpan) : Long
        // .valueOf(prevSpan + countSpaces(prevSpan - 1)),
        // i == (spans.length - 1) ? Long.valueOf(spans[i]) : Long
        // .valueOf(spans[i] - 1), "Sentence", fm);
        int start = prevSpan;
        int end = spans[i];

        // remove leading spaces of a sentence
        for (int j = start; j < end
            && Character.isWhitespace(text.charAt(j)); j++) {
          start = j + 1;
        }

        // remove trailing spaces of a sentence
        if (end > 1) {
          for (int j = end; j > start
              && Character.isWhitespace(text.charAt(j - 1)); j--) {
            end = j - 1;
          }
        }

        annotations.add(Long.valueOf(start), Long.valueOf(end),
            "Sentence", fm);
        if(!isSentenceSplitted)
          isSentenceSplitted = true;

      } catch (InvalidOffsetException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }

      prevSpan = spans[i];
    }
    if(!isSentenceSplitted){
      FeatureMap fm = Factory.newFeatureMap();
      // type
      fm.put("source", "openNLP");
      try {
        annotations.add(new Long(0), new Long(text.length()),
            "Sentence", fm);
      } catch (InvalidOffsetException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
View Full Code Here

  @SuppressWarnings("unchecked")
  @Override
  public void execute() throws ExecutionException {
    // text doc annotations
    AnnotationSet annotations;
    if (annotationSetName != null && annotationSetName.length() > 0) {
      annotations = document.getAnnotations(annotationSetName);
    } else {
      annotations = document.getAnnotations();
    }

    // getdoc.get text
    // String text = document.getContent().toString();

    // get token and sentence annotations
    AnnotationSet sentences = annotations.get("Sentence");
    AnnotationSet tokensAS = annotations.get("Token");

    if (sentences != null && sentences.size() > 0 && tokensAS != null
        && tokensAS.size() > 0) {

      // order them
      List<Annotation> sentList = new LinkedList<Annotation>();

      for (Iterator iterator = sentences.iterator(); iterator.hasNext();) {
        sentList.add((Annotation) iterator.next());

      }

      java.util.Collections.sort(sentList,
          new gate.util.OffsetComparator());

      // for each sentence get token annotations
      for (Iterator iterator = sentList.iterator(); iterator.hasNext();) {
        Annotation annotation = (Annotation) iterator.next();

        AnnotationSet sentenceTokens = annotations.get("Token",
            annotation.getStartNode().getOffset(), annotation
                .getEndNode().getOffset());

        // create a list

        List<Annotation> annList = new LinkedList<Annotation>();

        for (Iterator<Annotation> iterator2 = sentenceTokens.iterator(); iterator2
            .hasNext();) {
          annList.add(iterator2.next());

        }

        // order on offset
        Collections.sort(annList, new gate.util.OffsetComparator());

        // make the array be string[] sentence
        String[] tokens = new String[sentenceTokens.size()];
        String[] postags = new String[sentenceTokens.size()];
        int i = 0;
        for (Iterator iterator3 = annList.iterator(); iterator3
            .hasNext();) {

          Annotation token = (Annotation) iterator3.next();
View Full Code Here

    String locType = "location";
    String orgType = "org";
    String perType = "person";

    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;
    Element<Object> element;
    ArrayList<Annotation> tokens, dToks, lToks, oToks, pToks;
    AnnotationSet dats, locs, orgs, pers;
    String chunk, label;

    for (Annotation sentence : sentences) {

      //extract NEs from sentence
View Full Code Here

    pipeline.execute();
    Factory.deleteResource(corpus);
    Factory.deleteResource(pipeline);

    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;
View Full Code Here

    pipeline.execute();
    Factory.deleteResource(corpus);
    Factory.deleteResource(pipeline);

    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    Alphabet labelAlphabet = tagger.getYAlphabet();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
View Full Code Here

    //specify noun and verb chunk types
    String nChunkType = "nchunk";
    String vChunkType = "vchunk";

    // extract sentences from text
    AnnotationSet sentences = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    OffsetComparator oc = new OffsetComparator();
    SparseVector[] x;
    Object[] y;
    ElementSequence<Element<Object>> sequence;
    Element<Object> element;
    ArrayList<Annotation> tokens, nToks, vToks;
    AnnotationSet nChunks, vChunks;
    String chunk, label;

    for (Annotation sentence : sentences) {

      //extract noun and verb chunks from sentence
View Full Code Here

  @SuppressWarnings("unchecked")
  public void extractData(String sequenceType, String elementType, String labelType)
  throws ExecutionException, InvalidOffsetException, ResourceInstantiationException {

    //extract tokens from text
    AnnotationSet tokens = inputAS.get(sequenceType);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    ElementSequence<Element<Object>> sequence;
    SparseVector[] x;
    Object[] y;
    char[] chars;
View Full Code Here

TOP

Related Classes of gate.AnnotationSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.