Package gate

Examples of gate.Document


    FeatureMap docfeats = document.getFeatures();
    thefeats.putAll(docfeats);

    String theName = document.getName();
    // create a copy of the current document
    Document newDoc;
    try {
      newDoc = (Document) Factory.createResource(
              "gate.corpora.DocumentImpl",
              theparms,
              thefeats,
              theName+"_virtual");
    } catch (ResourceInstantiationException ex) {
      throw new GateRuntimeException(ex);
    }

    /* no forward annotation mappig yet ...
    if(annotatedDocumentTransformer.getGenerateForwardOffsetMap()) {
      annotatedDocumentTransformer.addForwardMappedAnnotations(
              document, newDoc,
              mapBackAnnotations);
    }
    */

    languageAnalyser.setDocument(newDoc);
    languageAnalyser.execute();

    if(annotatedDocumentTransformer.getGenerateBackwardOffsetMap()) {
      // figure out the annotation set names to map back
      List<String> effectiveMapFromAnnsetNames = new ArrayList<String>();
      if(mapBackAnnotations == null || mapBackAnnotations.size() == 0) {
        effectiveMapFromAnnsetNames.add("");
        Set<String> setnames = newDoc.getAnnotationSetNames();
        if(setnames != null) {
          for(String sn : setnames) {
            effectiveMapFromAnnsetNames.add(sn);
          }
        }
View Full Code Here


    FeatureMap docfeats = document.getFeatures();
    thefeats.putAll(docfeats);

    String theName = document.getName();
    // create a copy of the current document
    Document newDoc;
    try {
      newDoc = (Document) Factory.createResource(
              "gate.corpora.DocumentImpl",
              theparms,
              thefeats,
              theName+"_virtual");
    } catch (ResourceInstantiationException ex) {
      throw new GateRuntimeException(ex);
    }

    // set the initial annotations in the virtual document
    AnnotationSet newSet = newDoc.getAnnotations(virtualSpecificationSet);
    for(AnnotationSpec annspec : annspecs) {
      FeatureMap fm = Factory.newFeatureMap();
      fm.putAll(annspec.annotation.getFeatures());
      fm.put("orig_id",annspec.origId);     
      try {
        newSet.add(annspec.fromOffset, annspec.toOffset, virtualSpecificationType, fm);
      } catch(InvalidOffsetException ex) {
        throw new GateRuntimeException(
          "Invalid offset when creating annotation for virtual document: from/to/doclength: "+
          annspec.fromOffset+"/"+annspec.toOffset+"/"+newDoc.getContent().size(),ex);
      }
    }
   
    languageAnalyser.setDocument(newDoc);
    languageAnalyser.execute();

    // Go through the annotations in the generated document and map the created
    // feature back to the original document
    List<Annotation> virtanns =
    gate.Utils.inDocumentOrder(
      newDoc.getAnnotations(virtualSpecificationSet).get(virtualSpecificationType));
    // System.err.println("Processing virtual annotations: "+virtanns.size());
    for(Annotation virtann : virtanns) {
      String value = (String)virtann.getFeatures().get(virtualSpecificationFeature);
      Integer id = (Integer)virtann.getFeatures().get("orig_id");
      Annotation origann = anns.get(id);
View Full Code Here

        thefeats.put(k, docfeats.get(k));
      }

      String theName = document.getName();
      // create a copy of the current document
      Document newDoc;
      try {
        newDoc = (Document) Factory.createResource(
              //theclass,
              "gate.corpora.DocumentImpl",
              theparms,
              thefeats,
              theName+copiedDocNameSuffix
              );
      } catch (ResourceInstantiationException ex) {
        throw new GateRuntimeException(ex);
      }
      if(annotatedDocumentTransformer != null) {
        if(forwardcopy) {
          annotatedDocumentTransformer.
            addForwardMappedAnnotations(document, newDoc, annotationSetNames);
        }
      } else {
        // TODO: which annotation sets to copy to the copied doc here?
        // TODO: at least copy the ones specified!
      }

      if(directoryFile != null) {
        String out = "";
        if(getSavePreservingFormat()) {
          AnnotationSet as = newDoc.getAnnotations(annotationSetNames.get(0));
          out = newDoc.toXml(as,addFeaturesToPreservingFormat);
        } else {
          out = newDoc.toXml();
        }
        File outFile = new File(directoryFile, theName+copiedDocNameSuffix+".xml");
        PrintStream outStream;
        try {
          outStream = new PrintStream(outFile);
View Full Code Here

            "com.jpetrak.gate.stringannotation.extendedgazetteer2.ExtendedGazetteer2", parms);
    // load the document
    parms = Factory.newFeatureMap();
    File docFile = new File(testingDir,"extgaz2docprep.xml");
    parms.put("sourceUrl",docFile.toURI().toURL());
    Document doc = (Document)
         Factory.createResource("gate.corpora.DocumentImpl", parms);
    AnnotationSet lookups = doc.getAnnotations().get("OutType");
    assertEquals(0,lookups.size());
    // run the gazetteer on the document
    eg.setDocument(doc);
    eg.execute();
    // check if we got the correct annotations
    AnnotationSet tokens = doc.getAnnotations().get("Token");
    assertEquals(46,tokens.size());
    AnnotationSet sentences = doc.getAnnotations().get("Sentence");
    assertEquals(4,sentences.size());
    lookups = doc.getAnnotations().get("OutType");
    if(backendNr == 3) {
      assertEquals(12,lookups.size());
    } else {
      assertEquals(14,lookups.size());
    }
    int i = 1;
    FeatureMap fm;
    long from;
    long to;
    for(Annotation ann : gate.Utils.inDocumentOrder(lookups)) {
      //System.out.println("Annotation: "+ann);
      fm = ann.getFeatures();
      String inst = (String)fm.get("inst");
      String string = (String)fm.get("_string");
      from = ann.getStartNode().getOffset();
      to = ann.getEndNode().getOffset();
      if(i == 1) {
        assertEquals(8,from);
        assertEquals(12,to);
        assertEquals("i1",inst);
        assertEquals("some",string);
      } else if(i == 5) {
        assertEquals(26,from);
        assertEquals(34,to);
        assertEquals("i11",inst);
        assertEquals("word and",string);
      }
      i++;
    }
    doc.getAnnotations().removeAll(lookups);
    eg.setMatchAtWordStartOnly(false);
    eg.setMatchAtWordEndOnly(false);
    eg.execute();
    lookups = doc.getAnnotations().get("OutType");
    assertEquals(22,lookups.size());
    doc.getAnnotations().removeAll(lookups);
    eg.setLongestMatchOnly(false);
    eg.execute();
    lookups = doc.getAnnotations().get("OutType");
    assertEquals(26,lookups.size());
    System.out.println("Gazetteer application test 1 finished for backedn "+backendNr);
  }
View Full Code Here

    // load the document
    eg.setOutputAnnotationSet("EXT");
    parms = Factory.newFeatureMap();
    File docFile = new File(testingDir,"news1pre.xml");
    parms.put("sourceUrl",docFile.toURI().toURL());
    Document doc = (Document)
         Factory.createResource("gate.corpora.DocumentImpl", parms);
    AnnotationSet lookups = doc.getAnnotations("EXT").get("Lookup");
    assertEquals(0,lookups.size());
    // run the gazetteer on the document
    eg.setDocument(doc);
    eg.execute();
    AnnotationDiffer differ = new AnnotationDiffer();
    differ.setSignificantFeaturesSet(new HashSet<String>());
    AnnotationSet keys = doc.getAnnotations().get("Lookup");
    System.out.println("Lookups old: "+keys.size());
    AnnotationSet responses = doc.getAnnotations("EXT").get("Lookup");
    System.out.println("Lookups new: "+responses.size());
    differ.calculateDiff(keys, responses);
    int correct = differ.getCorrectMatches();
    int falsePositives = differ.getFalsePositivesStrict();
    int missing = differ.getMissing();
    System.out.println("Diff: correct="+correct+" false positives="+falsePositives+" missing="+missing);
    File outFile = new File(testingDir,"news1pre_procBE"+backendNr+".xml");
    FileUtils.writeStringToFile(outFile, doc.toXml(),"UTF-8");
    // the 33 false Positives come from duplicates that are introduced from several list files
    // and which are not removed by the ExtGaz
    // the 2 missing come from matches within hyphenated words (after a hyphen) which are
    // not found by ExtGaz because they occur inside a single Token (hyphens do not split up tokens)
    assertEquals(194,correct);
View Full Code Here

   */
  @Override
  public LanguageResource adopt(LanguageResource langres) throws PersistenceException {
    LanguageResource lr = langres;
    if(lr instanceof Document) {
      Document doc = (Document)lr;
      if(doc.getDataStore() == null || doc.getDataStore() != this) {
        throw new PersistenceException("Cannot adopt document, already in a different datastore: "+lr.getName());
      }
      // otherwise, the document is already adopted by this datastore so we
      // silently ignore this.
    } else if(lr instanceof CorpusImpl) {
View Full Code Here

   */
  @Override
  public LanguageResource adopt(LanguageResource langres) throws PersistenceException {
    LanguageResource lr = langres;
    if(lr instanceof Document) {
      Document doc = (Document)lr;
      if(doc.getDataStore() == null || doc.getDataStore() != this) {
        throw new PersistenceException("Cannot adopt document, already in a different datastore: "+lr.getName());
      }
      // otherwise, the document is already adopted by this datastore so we
      // silently ignore this.
    } else if(lr instanceof CorpusImpl) {
View Full Code Here

TOP

Related Classes of gate.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.