Package at.ofai.gate.virtualdocuments

Source Code of at.ofai.gate.virtualdocuments.CopyVirtualDocumentPR

/*
*  AnnotateBySpecPR.java
*
*  This file is is free software, licenced under the
*  GNU Library General Public License, Version 2, June 1991.
*  See http://www.gnu.org/licenses/gpl-2.0.html
*
*  $Id: $
*/

package at.ofai.gate.virtualdocuments;

import gate.AnnotationSet;
import gate.Controller;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.LanguageAnalyser;
import gate.corpora.DocumentImpl;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ControllerAwarePR;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import javax.naming.InvalidNameException;


/**
* This creates a virtual document and copies it to the output corpus.
* @author johann
*/
@CreoleResource(name = "Copy Virtual Document PR",
        comment = "Create a virtual document and copy to another corpus")
public class CopyVirtualDocumentPR
  extends AbstractLanguageAnalyser
  implements LanguageAnalyser, ControllerAwarePR
{
  public static final long serialVersionUID = 1L;
 
  @RunTime
  @CreoleParameter(comment = "A list of source annotation specifications")
  public void setOutputCorpus(Corpus outputCorpus) {
    this.outputCorpus = outputCorpus;
  }
  public Corpus getOutputCorpus() {
    return outputCorpus;
  }
  Corpus outputCorpus;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "URL of the directory where to write the document files to"
    )
  public void setDirectoryUrl(URL directoryUrl) {
    this.directoryUrl = directoryUrl;
  }
  public URL getDirectoryUrl() {
    return directoryUrl;
  }
  URL directoryUrl;

  File directoryFile;



  @RunTime
  @CreoleParameter(comment = "A list of source annotation specifications (document is copied as is if not given)")
  public void setSourceSpecifications(List<String> ss) {
    this.sourceSpecifications = ss;
  }
  public List<String> getSourceSpecifications() {
    return sourceSpecifications;
  }
  private List<String> sourceSpecifications;

  @RunTime
  @CreoleParameter(comment = "A list or processing options",
    defaultValue="separator=;takeAll=false;takeOverlapping=false;separatorSame=;separatorKeyValue=;terminator="
    )
  public void setProcessingOptions(FeatureMap po) {
    this.processingOptions = po;
  }
  public FeatureMap getProcessingOptions() {
    return processingOptions;
  }
  private FeatureMap processingOptions;

  @RunTime
  @CreoleParameter(comment = "A list of Annotation Sets/Types to map to the new document; use set[.type] format")
  public void setAnnotationSetNames(List<String> as) {
    this.annotationSetNames = as;
  }
  public List<String> getAnnotationSetNames() {
    return annotationSetNames;
  }
  private List<String> annotationSetNames;

  @RunTime
  @CreoleParameter(comment = "The input annotation set",
  defaultValue = "")
  public void setInputAnnotationSetName(String ias) {
    this.inputAnnotationSetName = ias;
  }
  public String getInputAnnotationSetName() {
    return inputAnnotationSetName;
  }
  private String inputAnnotationSetName = "";


  @RunTime
  @CreoleParameter(comment = "Preserve format from 'Original Markups' or the only set given for annotation names  when saving to a file", defaultValue = "false")
  public void setSavePreservingFormat(Boolean yesno) {
    savePreservingFormat = yesno;
  }
  public Boolean getSavePreservingFormat() {
    return savePreservingFormat;
  }
  private Boolean savePreservingFormat;


  @RunTime
  @CreoleParameter(comment = "add features when preserving format", defaultValue = "false")
  public void setAddFeaturesToPreservingFormat(Boolean yesno) {
    addFeaturesToPreservingFormat = yesno;
  }
  public Boolean getAddFeaturesToPreservingFormat() {
    return addFeaturesToPreservingFormat;
  }
  private Boolean addFeaturesToPreservingFormat;

  @RunTime
  @CreoleParameter(comment = "Suffix to add to name of copied document",
  defaultValue = "")
  public void setCopiedDocNameSuffix(String suffix) {
    copiedDocNameSuffix = suffix;
  }
  public String getCopiedDocNameSuffix() {
    return copiedDocNameSuffix;
  }
  private String copiedDocNameSuffix;

  private boolean forwardcopy = false;

  AnnotatedDocumentTransformer annotatedDocumentTransformer;

  @Override
  public void execute() {
    fireStatusChanged("CopyVirtualDocumentPR processing: "
            + getDocument().getName());


    String newText = "";
    if(annotatedDocumentTransformer != null) {
      newText = annotatedDocumentTransformer.getStringForDocument(
            getDocument(), inputAnnotationSetName);
    } else {
      newText = getDocument().getContent().toString();
    }
     

      if (!(document instanceof DocumentImpl)) {
        throw new GateRuntimeException("Can only handle DocumentImpl not " + document.getClass());
      }
      String theclass = document.getClass().toString();
      FeatureMap theparms = Factory.newFeatureMap();
      theparms.put("collectRepositioningInfo", document.getCollectRepositioningInfo());
      theparms.put("encoding", ((DocumentImpl) document).getEncoding());
      theparms.put("markupAware", document.getMarkupAware());
      theparms.put("mimeType", ((DocumentImpl)document).getMimeType());
      theparms.put("preserveOriginalContent", document.getPreserveOriginalContent());
      theparms.put("stringContent", newText);
      FeatureMap thefeats = Factory.newFeatureMap();
      FeatureMap docfeats = document.getFeatures();
      for(Object k : docfeats.keySet()) {
        thefeats.put(k, docfeats.get(k));
      }

      String theName = document.getName();
      // create a copy of the current document
      Document newDoc;
      try {
        newDoc = (Document) Factory.createResource(
              //theclass,
              "gate.corpora.DocumentImpl",
              theparms,
              thefeats,
              theName+copiedDocNameSuffix
              );
      } catch (ResourceInstantiationException ex) {
        throw new GateRuntimeException(ex);
      }
      if(annotatedDocumentTransformer != null) {
        if(forwardcopy) {
          annotatedDocumentTransformer.
            addForwardMappedAnnotations(document, newDoc, annotationSetNames);
        }
      } else {
        // TODO: which annotation sets to copy to the copied doc here?
        // TODO: at least copy the ones specified!
      }

      if(directoryFile != null) {
        String out = "";
        if(getSavePreservingFormat()) {
          AnnotationSet as = newDoc.getAnnotations(annotationSetNames.get(0));
          out = newDoc.toXml(as,addFeaturesToPreservingFormat);
        } else {
          out = newDoc.toXml();
        }
        File outFile = new File(directoryFile, theName+copiedDocNameSuffix+".xml");
        PrintStream outStream;
        try {
          outStream = new PrintStream(outFile);
        } catch (FileNotFoundException ex) {
          throw new GateRuntimeException("Cannot write to file "+outFile.getAbsoluteFile(),ex);
        }
        outStream.print(out);
        outStream.close();
      }

      // we keep the document if it is added to a transient corpus, otherwise
      // we drop it.
      boolean keepDoc = false;

      if(outputCorpus != null) {
        outputCorpus.add(newDoc);
        if(outputCorpus.getLRPersistenceId() == null) {
          keepDoc = true;
        } else {
          outputCorpus.unloadDocument(newDoc);
        }
      }
      if(!keepDoc) {
        Factory.deleteResource(newDoc);
      }


    fireStatusChanged("CopyVirtualDocumentPR completed");

  }
 
  @Override
  public void controllerExecutionAborted(Controller arg0, Throwable arg1)
      throws ExecutionException {
  }

  @Override
  public void controllerExecutionFinished(Controller arg0)
      throws ExecutionException {
  }

  @Override
  public void controllerExecutionStarted(Controller arg0)
      throws ExecutionException {
    startup();
  }

  public void startup() throws ExecutionException {
    forwardcopy = false;
    if(annotationSetNames != null && annotationSetNames.size() > 0) {
      forwardcopy = true;
    }
    if(getSavePreservingFormat()) {
      if(annotationSetNames.size() != 1) {
        throw new GateRuntimeException("Need exactly one set for annotation sets if save preserving format is true");
      }
      String annsetname = annotationSetNames.get(0);
      if(annsetname.contains(".")) {
        throw new GateRuntimeException("Annotation set must not contain a type (have a dot) if save preserving format is true");
      }
    }
    if(directoryUrl != null) {
      try {
        directoryFile = new File(directoryUrl.toURI());
      } catch (URISyntaxException ex) {
        throw new GateRuntimeException(ex);
      }
    } else {
      directoryFile = null;
    }
    if(directoryUrl == null && outputCorpus == null) {
      throw new GateRuntimeException("Output corpus and directory URL may not be both missing");
    }
    try {
      annotatedDocumentTransformer = null;
      if(getSourceSpecifications() != null && getSourceSpecifications().size() > 0) {
        annotatedDocumentTransformer =
          new AnnotatedDocumentTransformer(
          getSourceSpecifications(), getProcessingOptions(),
          forwardcopy, false);
      }
    } catch (InvalidNameException ex) {
      throw new ExecutionException(ex);
    }
  }
 
}
TOP

Related Classes of at.ofai.gate.virtualdocuments.CopyVirtualDocumentPR

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.