Examples of AceDocument

   */
  private List<CoreMap> readDocument(String prefix, Annotation corpus) throws IOException, SAXException,
      ParserConfigurationException {
    logger.info("Reading document: " + prefix);
    List<CoreMap> results = new ArrayList<CoreMap>();
    AceDocument aceDocument;
    if(aceVersion.equals("ACE2004")){
      aceDocument = AceDocument.parseDocument(prefix, false, aceVersion);
    } else {
      aceDocument = AceDocument.parseDocument(prefix, false);
    }
    String docId = aceDocument.getId();


    // map entity mention ID strings to their EntityMention counterparts
    Map<String, EntityMention> entityMentionMap = Generics.newHashMap();


    /*
    for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
      List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
      StringBuffer b = new StringBuffer();
      for(AceToken t: tokens) b.append(t.getLiteral() + " " );
      logger.info("SENTENCE: " + b.toString());
    }
    */


    int tokenOffset = 0;
    for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
      List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);


      List<CoreLabel> words = new ArrayList<CoreLabel>();
      StringBuilder textContent = new StringBuilder();
      for(int i = 0; i < tokens.size(); i ++){
        CoreLabel l = new CoreLabel();
        l.setWord(tokens.get(i).getLiteral());
        l.set(CoreAnnotations.ValueAnnotation.class, l.word());
        l.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokens.get(i).getByteStart());
        l.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokens.get(i).getByteEnd());
        words.add(l);
        if(i > 0) textContent.append(" ");
        textContent.append(tokens.get(i).getLiteral());
      }


      // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer)
      if (words.size() == 1) {
        String word = words.get(0).word();
        if (word.startsWith("<") && word.endsWith(">")) {
          tokenOffset += tokens.size();
          continue;
        }
      }


      CoreMap sentence = new Annotation(textContent.toString());
      sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
      sentence.set(CoreAnnotations.TokensAnnotation.class, words);
      logger.info("Reading sentence: \"" + textContent + "\"");


      List<AceEntityMention> entityMentions = aceDocument.getEntityMentions(sentenceIndex);
      List<AceRelationMention> relationMentions = aceDocument.getRelationMentions(sentenceIndex);
      List<AceEventMention> eventMentions = aceDocument.getEventMentions(sentenceIndex);


      // convert entity mentions
      for (AceEntityMention aceEntityMention : entityMentions) {
        String corefID="";
        for(String entityID : aceDocument.getKeySetEntities()){
          AceEntity e = aceDocument.getEntity(entityID);
          if(e.getMentions().contains(aceEntityMention)){
            corefID = entityID;
            break;
          }
        }
Examples of AceDocument

Examples of edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDocument