Package org.apache.uima.examples

Examples of org.apache.uima.examples.SourceDocumentInformation


    FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    if (!it.hasNext()) {
      throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
              new Object[0]);
    }
    SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
    if (sourceDocInfo.getLastSegment()) {
      // time to produce an output CAS
      // set the document text
      mMergedCas.setDocumentText(mDocBuf.toString());

      // add source document info to destination CAS
      SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
      destSDI.setUri(sourceDocInfo.getUri());
      destSDI.setOffsetInSource(0);
      destSDI.setLastSegment(true);
      destSDI.addToIndexes();

      mDocBuf = new StringBuffer();
      mReadyToOutput = true;
    }
  }
View Full Code Here


    mPos = 0;
    // retrieve the filename of the input file from the CAS so that it can be added
    // to each segment
    FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      mDocUri = fileLoc.getUri();
    } else {
      mDocUri = null;
    }
  }
View Full Code Here

    try {
      jcas.setDocumentText(mDoc.substring(mPos, breakAt));
      // if original CAS had SourceDocumentInformation, also add SourceDocumentInformatio
      // to each segment
      if (mDocUri != null) {
        SourceDocumentInformation sdi = new SourceDocumentInformation(jcas);
        sdi.setUri(mDocUri);
        sdi.setOffsetInSource(mPos);
        sdi.setDocumentSize(breakAt - mPos);
        sdi.addToIndexes();

        if (breakAt == mDoc.length()) {
          sdi.setLastSegment(true);
        }
      }

      mPos = breakAt;
      return jcas;
View Full Code Here

    // Also store location of source document in CAS. This information is critical
    // if CAS Consumers will need to know where the original document contents are located.
    // For example, the Semantic Search CAS Indexer writes this information into the
    // search index that it creates, which allows applications that use the search index to
    // locate the documents that satisfy their semantic queries.
    SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
    srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
    srcDocInfo.setOffsetInSource(0);
    srcDocInfo.setDocumentSize((int) file.length());
    srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
    srcDocInfo.addToIndexes();
  }
View Full Code Here

        // Assume the variable "jcas" holds a reference to a JCas
        con.setAutoCommit(false); // need this for batch updating
      }

      // get the singleton instance of the SourceDocumentInformation
      SourceDocumentInformation sdi = (SourceDocumentInformation)
              jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator().next();

      System.out.println("Time: " + (System.currentTimeMillis() - startTime)
              + " DB Writer: Processing doc: '" + sdi.getUri() + "'");

      stmt.setString(1, truncate(sdi.getUri(), MAX_URI_LENGTH));
      for (FSIterator iter = jcas.getAnnotationIndex(PersonTitle.type).iterator();
           iter.hasNext();) {
        PersonTitle pt = (PersonTitle) iter.next();
        stmt.setString(2, truncate(pt.getCoveredText(), MAX_TITLE_LENGTH));
        stmt.setInt(3, pt.getBegin());
View Full Code Here

    // retrieve the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += fileLoc.getOffsetInSource();
        }
        outFile = new File(mOutputDir, outFileName);
      } catch (MalformedURLException e1) {
        // invalid URL, use default processing below
      }
View Full Code Here

    // retrieve the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        outFile = new File(mOutputDir, inFile.getName());
      } catch (MalformedURLException e1) {
        // invalid URL, use default processing below
      }
    }
View Full Code Here

    boolean titleP = false;
    String docUri = null;
    Iterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    if (it.hasNext()) {
      SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) it.next();
      docUri = srcDocInfo.getUri();
    }

    // iterate and print annotations
    Iterator annotationIter = jcas.getAnnotationIndex().iterator();
    while (annotationIter.hasNext()) {
View Full Code Here

        String doc_id = null;
        try {
            Iterator<? extends Annotation> it;
            it = UIMAAnnotationUtils.iterator(jcas, SourceDocumentInformation.class);
            while (it.hasNext() && (null == doc_id)) {
                SourceDocumentInformation sdi = (SourceDocumentInformation)it.next();
                String[] toks = sdi.getUri().split("\\/");
                doc_id = toks[toks.length - 1];
            }
        }
        catch (Exception e) {
            e.printStackTrace();
View Full Code Here

    File docFile = JUnitExtension.getFile("testData.txt");
    String document = FileUtils.file2String(docFile);
    cas.setDocumentText(document);
    cas.setDocumentLanguage("en");
   
    SourceDocumentInformation sdi_ann = new SourceDocumentInformation(cas.getJCas(), 0, document.length());
    sdi_ann.setUri(docFile.toURI().toString());
    sdi_ann.addToIndexes();
    ae.process(cas);
   
    File outFile = new File("tempTestOut/testData.txt.fve");
    File outFileRef = JUnitExtension.getFile("testDataRef.txt.fve");
   
View Full Code Here

TOP

Related Classes of org.apache.uima.examples.SourceDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.