Package org.apache.uima.examples

Examples of org.apache.uima.examples.SourceDocumentInformation


    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {
View Full Code Here


      // Also store location of source document in CAS. This information is critical
      // if CAS Consumers will need to know where the original document contents are located.
      // For example, the Semantic Search CAS Indexer writes this information into the
      // search index that it creates, which allows applications that use the search index to
      // locate the documents that satisfy their semantic queries.
      SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
      srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
      srcDocInfo.setOffsetInSource(0);
      srcDocInfo.setDocumentSize((int) file.length());
      srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
      srcDocInfo.addToIndexes();
    }
    // XCAS input files
    else {
      try {
        if (mXCAS.equalsIgnoreCase("xmi")) {
View Full Code Here

    mPos = 0;
    // retreive the filename of the input file from the CAS so that it can be added
    // to each segment
    FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      mDocUri = fileLoc.getUri();
    } else {
      mDocUri = null;
    }
  }
View Full Code Here

    try {
      jcas.setDocumentText(mDoc.substring(mPos, breakAt));
      // if original CAS had SourceDocumentInformation, also add SourceDocumentInformatio
      // to each segment
      if (mDocUri != null) {
        SourceDocumentInformation sdi = new SourceDocumentInformation(jcas);
        sdi.setUri(mDocUri);
        sdi.setOffsetInSource(mPos);
        sdi.setDocumentSize(breakAt - mPos);
        sdi.addToIndexes();

        if (breakAt == mDoc.length()) {
          sdi.setLastSegment(true);
        }
      }

      mPos = breakAt;
      return jcas;
View Full Code Here

    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        outFile = new File(mOutputDir, inFile.getName());
      } catch (MalformedURLException e1) {
        // invalid URL, use default processing below
      }
    }
View Full Code Here

      mBuf.append(mDoc.substring(mCurIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      mHasNext = false; // we need to see another input CAS before we can create output
      mCurIndex = 0;
    } else // yes, newline
    {
      // append doc up to newline
      int begin = mBuf.length(); // record start offset of new text
      mBuf.append(mDoc.substring(mCurIndex, nlIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      // set doc text
      mJCases[mActiveJCas].setDocumentText(mBuf.toString());
      mBuf.setLength(0);
      mCurIndex = nlIndex + 1;
      mHasNext = true; // ready to output!
View Full Code Here

  private String getCasSourceUri(JCas jcas) {
    Iterator<Annotation> iter = jcas.getJFSIndexRepository().getAnnotationIndex(SourceDocumentInformation.type)
            .iterator();
    if (iter.hasNext()) {
      SourceDocumentInformation sdi = (SourceDocumentInformation) iter.next();
      return sdi.getUri();
    } else {
      return "unknown";
    }
  }
View Full Code Here

    // Also store location of source document in CAS. This information is critical
    // if CAS Consumers will need to know where the original document contents are located.
    // For example, the Semantic Search CAS Indexer writes this information into the
    // search index that it creates, which allows applications that use the search index to
    // locate the documents that satisfy their semantic queries.
    SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
    srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
    srcDocInfo.setOffsetInSource(0);
    srcDocInfo.setDocumentSize((int) file.length());
    srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
    srcDocInfo.addToIndexes();
  }
View Full Code Here

    // retrieve the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        // handle blanks in path
        // https://issues.apache.org/jira/browse/UIMA-1748
        // use 3 arg form of URI Constructor to properly quote any otherwise illegal chars such as blank
        // https://issues.apache.org/jira/browse/UIMA-2097
        URI uri = UriUtils.quote(fileLoc.getUri());
        inFile = new File(uri);
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (URISyntaxException e) {
View Full Code Here

      mBuf.append(mDoc.substring(mCurIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      mHasNext = false; // we need to see another input CAS before we can create output
      mCurIndex = 0;
    } else // yes, newline
    {
      // append doc up to newline
      int begin = mBuf.length(); // record start offset of new text
      mBuf.append(mDoc.substring(mCurIndex, nlIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      // set doc text
      mJCases[mActiveJCas].setDocumentText(mBuf.toString());
      mBuf.setLength(0);
      mCurIndex = nlIndex + 1;
      mHasNext = true; // ready to output!
View Full Code Here

TOP

Related Classes of org.apache.uima.examples.SourceDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.