Package org.apache.uima.cas.test

Source Code of org.apache.uima.cas.test.SerializationReinitTest

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.cas.test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import junit.framework.TestCase;

import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIndexRepository;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Marker;
import org.apache.uima.cas.ShortArrayFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.CASAdminException;
import org.apache.uima.cas.admin.CASFactory;
import org.apache.uima.cas.admin.CASMgr;
import org.apache.uima.cas.admin.TypeSystemMgr;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.CASSerializer;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.impl.Serialization;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas_data.impl.CasComparer;
import org.apache.uima.internal.util.TextStringTokenizer;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
import org.apache.uima.test.junit_extension.JUnitExtension;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;

/**
* Class comment for TokenizerTest.java goes here.
*
*/
public class SerializationReinitTest extends TestCase {

  public static final String TOKEN_TYPE = "Token";

  public static final String TOKEN_TYPE_FEAT = "type";

  public static final String TOKEN_TYPE_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
          + TOKEN_TYPE_FEAT;

  public static final String TOKEN_TYPE_TYPE = "TokenType";

  public static final String WORD_TYPE = "Word";

  public static final String SEP_TYPE = "Separator";

  public static final String EOS_TYPE = "EndOfSentence";

  public static final String SENT_TYPE = "Sentence";

  public static final String STRING_SUBTYPE_1 = "StringSubtype1";

  public static final String[] STR_1_VALS = { "test1", "test2" };

  public static final String OSTR_TYPE = "theType";
 
  public static final String OSTR_TYPE_FEAT = "theString";
 
  public static final String OBYTE_TYPE_FEAT = "theByte";
 
  public static final String OSHORT_TYPE_FEAT = "theShort";
 
  public static final String OBYTEA_TYPE_FEAT = "theByteArray";
 
  public static final String OSHORTA_TYPE_FEAT = "theShortArray";
 
  public static final String OLONG_TYPE_FEAT = "theLong";

  private CASMgr casMgr;

  private CAS cas;

  private Type wordType;

  private Type separatorType;

  private Type eosType;

  private Type tokenType;

  private Feature tokenTypeFeature;

  private Type sentenceType;

  private Feature startFeature;

  private Feature endFeature;

  private Type strSub1;

  private Type theTypeType;
 
  private Feature theStringFeature;
 
  private Feature theByteFeature;
 
  private Feature theShortFeature;
 
  private Feature theByteArrayFeature;
 
  private Feature theShortArrayFeature;
 
  private Feature theLongFeature;
 
  private FsIndexDescription[] indexes;

  private TypeSystemDescription typeSystem;


  public SerializationReinitTest(String arg) {
    super(arg);
  }

  /**
   * @see junit.framework.TestCase#setUp()
   */
  public void setUp() throws Exception {
    super.setUp();
    casMgr = initCAS();
    cas = (CASImpl)casMgr;

    TypeSystem ts = cas.getTypeSystem();
    wordType = ts.getType(WORD_TYPE);
    // assert(wordType != null);
    separatorType = ts.getType(SEP_TYPE);
    eosType = ts.getType(EOS_TYPE);
    tokenType = ts.getType(TOKEN_TYPE);
    tokenTypeFeature = ts.getFeatureByFullName(TOKEN_TYPE_FEAT_Q);
    startFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_BEGIN);
    endFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_END);
    sentenceType = ts.getType(SENT_TYPE);
    strSub1 = ts.getType(STRING_SUBTYPE_1);
    assertTrue(strSub1 != null);
    theTypeType = ts.getType(OSTR_TYPE);
    theStringFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSTR_TYPE_FEAT);
    theByteFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTE_TYPE_FEAT);
    theByteArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTEA_TYPE_FEAT);
    theShortFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORT_TYPE_FEAT);
    theShortArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORTA_TYPE_FEAT);
    theLongFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OLONG_TYPE_FEAT);
 
    File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
    File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");

    typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
                new XMLInputSource(typeSystemFile));
    indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
                .getFsIndexes();
  }

  public void tearDown() {
    casMgr = null;
    cas = null;
    wordType = null;
    // assert(wordType != null);
    separatorType = null;
    eosType = null;
    tokenType = null;
    tokenTypeFeature = null;
    startFeature = null;
    endFeature = null;
    sentenceType = null;
    strSub1 = null;
    indexes = null;
    typeSystem = null;
  }

  // Initialize the first CAS.
  private static CASMgr initCAS() throws CASException {
    // Create an initial CASMgr from the factory.
    // CASMgr cas = CASFactory.createCAS();
    // assert(tsa != null);
    // Create a CASMgr. Ensures existence of AnnotationFS type.
    // CASMgr tcas = CASFactory.createCAS();
    CASMgr aCas = CASFactory.createCAS();
    try {
      CasCreationUtils.setupTypeSystem(aCas, (TypeSystemDescription) null);
    } catch (ResourceInitializationException e) {
      e.printStackTrace();
    }
    // Create a writable type system.
    TypeSystemMgr tsa = aCas.getTypeSystemMgr();
    // Add new types and features.
    Type topType = tsa.getTopType();
    Type annotType = tsa.getType(CAS.TYPE_NAME_ANNOTATION);
    // assert(annotType != null);
    tsa.addType(SENT_TYPE, annotType);
    Type tokenType = tsa.addType(TOKEN_TYPE, annotType);
    Type tokenTypeType = tsa.addType(TOKEN_TYPE_TYPE, topType);
    tsa.addType(WORD_TYPE, tokenTypeType);
    tsa.addType(SEP_TYPE, tokenTypeType);
    tsa.addType(EOS_TYPE, tokenTypeType);
    tsa.addFeature(TOKEN_TYPE_FEAT, tokenType, tokenTypeType);
    tsa.addStringSubtype(STRING_SUBTYPE_1, STR_1_VALS);
    Type stringType = tsa.getType(CAS.TYPE_NAME_STRING);
    Type byteType = tsa.getType(CAS.TYPE_NAME_BYTE);
    Type byteArrayType = tsa.getType(CAS.TYPE_NAME_BYTE_ARRAY);
    Type shortType = tsa.getType(CAS.TYPE_NAME_SHORT);
    Type shortArrayType = tsa.getType(CAS.TYPE_NAME_SHORT_ARRAY);
    Type longType = tsa.getType(CAS.TYPE_NAME_LONG);
    Type theTypeType = tsa.addType(OSTR_TYPE, annotType);
    tsa.addFeature(OSTR_TYPE_FEAT, theTypeType, stringType);
    tsa.addFeature(OBYTE_TYPE_FEAT, theTypeType, byteType);
    tsa.addFeature(OSHORT_TYPE_FEAT, theTypeType, shortType);
    tsa.addFeature(OBYTEA_TYPE_FEAT, theTypeType, byteArrayType);
    tsa.addFeature(OSHORTA_TYPE_FEAT, theTypeType, shortArrayType);
    tsa.addFeature(OLONG_TYPE_FEAT, theTypeType, longType);
    // Commit the type system.
    ((CASImpl) aCas).commitTypeSystem();
    // assert(tsa.isCommitted());
    // // Create the CAS indexes.
    // tcas.initCASIndexes();
    // Create the Base indexes.
    try {
      aCas.initCASIndexes();
    } catch (CASException e) {
      e.printStackTrace();
    }

    // Commit the index repository.
    aCas.getIndexRepositoryMgr().commit();
    // assert(cas.getIndexRepositoryMgr().isCommitted());

    // Create the default text Sofa and return CAS view
    return (CASMgr) aCas.getCAS().getCurrentView();
  }

  public void testReset() {
    cas.reset();
    casMgr.enableReset(false);
    boolean exc = false;
    try {
      cas.reset();
    } catch (CASAdminException e) {
      assertTrue(e.getError() == CASAdminException.FLUSH_DISABLED);
      exc = true;
    }
    assertTrue(exc);
    casMgr.enableReset(true);
    cas.reset();
  }

  // Tokenize text.
  private void tokenize() throws Exception {
    // System.out.println("Tokenizing text.");

    // Create FSs for the token types.
    FeatureStructure wordFS = cas.createFS(wordType);
    FeatureStructure sepFS = cas.createFS(separatorType);
    FeatureStructure eosFS = cas.createFS(eosType);

    String text = cas.getDocumentText();
    TextStringTokenizer tokenizer = new TextStringTokenizer(text);
    tokenizer.setSeparators("/-*&@");
    tokenizer.addWhitespaceChars(",");
    tokenizer.setEndOfSentenceChars(".!?");
    tokenizer.setShowWhitespace(false);
    int tokenTypeCode;
    int wordCounter = 0;
    int sepCounter = 0;
    int endOfSentenceCounter = 0;
    AnnotationFS tokenAnnot;
    while (tokenizer.isValid()) {
      tokenAnnot = cas.createAnnotation(tokenType, tokenizer.getTokenStart(), tokenizer
              .getTokenEnd());
      tokenTypeCode = tokenizer.getTokenType();
      switch (tokenTypeCode) {
        case TextStringTokenizer.EOS: {
          ++endOfSentenceCounter;
          tokenAnnot.setFeatureValue(tokenTypeFeature, eosFS);
          break;
        }
        case TextStringTokenizer.SEP: {
          ++sepCounter;
          tokenAnnot.setFeatureValue(tokenTypeFeature, sepFS);
          break;
        }
        case TextStringTokenizer.WSP: {
          break;
        }
        case TextStringTokenizer.WCH: {
          ++wordCounter;
          tokenAnnot.setFeatureValue(tokenTypeFeature, wordFS);
          // if ((wordCounter % 100000) == 0) {
          // System.out.println("Number of words tokenized: " + wordCounter);
          // }
          break;
        }
        default: {
          throw new Exception("Something went wrong, fire up that debugger!");
        }
      }
      cas.getIndexRepository().addFS(tokenAnnot);
      tokenizer.setToNext();
      // System.out.println("Token: " + tokenizer.nextToken());
    }
    // time = System.currentTimeMillis() - time;
    // System.out.println("Number of words: " + wordCounter);
    // int allTokens = wordCounter + sepCounter + endOfSentenceCounter;
    // System.out.println("Number of tokens: " + allTokens);
    // System.out.println("Time used: " + new TimeSpan(time));

    // FSIterator it = cas.getAnnotationIndex(tokenType).iterator();
    // int count = 0;
    // while (it.isValid()) {
    // ++count;
    // it.moveToNext();
    // }
    // System.out.println("Number of tokens in index: " + count);
  }

  // Very (!) primitive EOS detection.
  private void createSentences() throws CASException {
    // TypeSystem ts = cas.getTypeSystem();
    // Type eosType = ts.getType(EOS_TYPE);
    // Type tokenType = ts.getType(TOKEN_TYPE);
    // //assert(tokenType != null);
    // Type sentenceType = ts.getType(SENT_TYPE);
    // Feature tokenTypeFeature = ts.getFeature(TOKEN_TYPE_FEAT);
    // Feature startFeature = ts.getFeature(CAS.START_FEAT);
    // Feature endFeature = ts.getFeature(CAS.END_FEAT);

    // System.out.println("\nCreating sentence annotations.");

    // Get a handle to the index repository.
    FSIndexRepository indexRepository = cas.getIndexRepository();
    // assert(indexRepository != null);
    Iterator<String> labelIt = indexRepository.getLabels();
    assertTrue(labelIt != null);
    // Get the standard index for tokens.
    FSIndex<AnnotationFS> tokenIndex = cas.getAnnotationIndex(tokenType);
    // assert(tokenIndex != null);
    // Get an iterator over tokens.
    FSIterator<AnnotationFS> it = tokenIndex.iterator();
    // assert(it != null);
    // Now create sentences. We do this as follows: a sentence starts where
    // the first token after an EOS starts, and ends with an EOS.
    long time = System.currentTimeMillis();
    int endOfSentenceCounter = 0;
    it.moveToFirst();
    boolean lookForStart = true;
    int start = 0, end; // Initialize start to pacify compiler.
    FeatureStructure tokenFS, sentFS;
    while (it.isValid()) {
      if (lookForStart) {
        // If we're looking for the start of a sentence, just grab the start
        // of the current FS.
        start = it.get().getIntValue(startFeature);
        lookForStart = false;
      } else {
        // Check if we've reached the end of a sentence.
        tokenFS = it.get();
        if (tokenFS.getFeatureValue(tokenTypeFeature).getType() == eosType) {
          end = tokenFS.getIntValue(endFeature);
          sentFS = cas.createFS(sentenceType);
          sentFS.setIntValue(startFeature, start);
          sentFS.setIntValue(endFeature, end);
          cas.getIndexRepository().addFS(sentFS);
          ++endOfSentenceCounter;
          lookForStart = true;
        }
      }
      it.moveToNext();
    }
    time = System.currentTimeMillis() - time;
    // System.out.println("Created " + endOfSentenceCounter + " sentences: " + new TimeSpan(time));
  }
 
  private static final Pattern nlPattern = Pattern.compile("(?m)(.*?$)");
  /**
   * Test driver.
   */
  public void testMain() throws Exception {

    // System.out.println("Setting up CAS.");
    // Create the initial CAS.
    long time = System.currentTimeMillis();
    time = System.currentTimeMillis() - time;
    // System.out.println("CAS set up: " + new TimeSpan(time));

    time = System.currentTimeMillis();
    // Read the document into a String. I'm sure there are better ways to
    File textFile = JUnitExtension.getFile("data/moby.txt");
    String moby = FileUtils.file2String(textFile);
    // String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
    String line;
//    BufferedReader br = new BufferedReader(new StringReader(moby));
    StringBuffer buf = new StringBuffer(10000);
    List<String> docs = new ArrayList<String>();
    Matcher m = nlPattern.matcher(moby);
    while (m.find()) {
      line = m.group();
      if (line.startsWith(".. <p")) {
        docs.add(buf.toString());
        buf.setLength(0);
      } else {
        buf.append(line + "\n");
      }
    }
//    while ((line = br.readLine()) != null) {
//      if (line.startsWith(".. <p")) {
//        docs.add(buf.toString());
//        buf = new StringBuffer();
//      } else {
//        buf.append(line + "\n");
//      }
//    }
    m.appendTail(buf);
    docs.add(buf.toString());
    buf = null;

    final int numDocs = docs.size();
    final int max = 30;
    int docCount = 0;
    long overallTime = System.currentTimeMillis();
    int numTok, numSent;
    CASSerializer cs;
    while (docCount < max) {
      for (int i = 0; i < numDocs && docCount < max; i++) {
        // System.out.println("Processing document: " + i);
        // Set document text in first CAS.
        cas.setDocumentText((String) docs.get(i));

        tokenize();
        numTok = cas.getAnnotationIndex(tokenType).size();
        assertTrue(numTok > 0);
        // System.out.println(" Number of tokens: " + numTok);

        // System.out.println("Serializing...");
        cs = Serialization.serializeCAS(cas);
        cas = Serialization.createCAS(casMgr, cs);

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());

        createSentences();
        numSent = cas.getAnnotationIndex(sentenceType).size();
        assertTrue(numSent > 0);
        // System.out.println(" Number of sentences: " + numSent);

        // System.out.println("Serializing...");
        cs = Serialization.serializeCAS(cas);
        cas = Serialization.createCAS(casMgr, cs);

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());

        // System.out.println("Serializing...");
        cs = Serialization.serializeCAS(cas);
        cas = Serialization.createCAS(casMgr, cs);

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
        // System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");

        casMgr.reset();

        ++docCount;
      }
      // System.out.println("Number of documents processed: " + docCount);
    }
    overallTime = System.currentTimeMillis() - overallTime;
    // System.out.println("Time taken over all: " + new TimeSpan(overallTime));

  }

  /**
   * Test setCAS().
   */
  public void testSetCAS() throws Exception {

    // Read the document into a String.
    File textFile = JUnitExtension.getFile("data/moby.txt");
    String moby = FileUtils.file2String(textFile);
    // String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
    String line;
//    BufferedReader br = new BufferedReader(new StringReader(moby));
    StringBuffer buf = new StringBuffer(10000);
    List<String> docs = new ArrayList<String>();
    Matcher m = nlPattern.matcher(moby);
    while (m.find()) {
      line = m.group();
      if (line.startsWith(".. <p")) {
        docs.add(buf.toString());
        buf.setLength(0);
      } else {
        buf.append(line + "\n");
      }
    }
   
//    while ((line = br.readLine()) != null) {
//      if (line.startsWith(".. <p")) {
//        docs.add(buf.toString());
//        buf = new StringBuffer();
//      } else {
//        buf.append(line + "\n");
//      }
//    }
//    docs.add(buf.toString());
    m.appendTail(buf);
    docs.add(buf.toString());
    buf = null;

    final int numDocs = docs.size();
    final int max = 30;
    int docCount = 0;
    long overallTime = System.currentTimeMillis();
    int numTok, numSent;
    while (docCount < max) {
      for (int i = 0; i < numDocs && docCount < max; i++) {
        // System.out.println("Processing document: " + i);
        // Set document text in first CAS.
        cas.setDocumentText((String) docs.get(i));

        tokenize();
        numTok = cas.getAnnotationIndex(tokenType).size();
        assertTrue(numTok > 0);
        // System.out.println(" Number of tokens: " + numTok);

        // System.out.println("Serializing...");
        // CASMgr casMgr = CASFactory.createCAS();
        // casMgr.setCAS(cas);
        // cas = (CAS) casMgr.getCAS();
        /* setCAS is no longer used or implemented
         * You cannot use this method to set up a new cas with a copy of
         * the contents of another cas, including its indexes
        CASMgr realCasMgr = CASFactory.createCAS(cas.getTypeSystem());
        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
        cas = ((CASImpl) realCasMgr).getCurrentView();
        casMgr = (CASMgr) cas;
        */

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());

        createSentences();
        numSent = cas.getAnnotationIndex(sentenceType).size();
        assertTrue(numSent > 0);
        // System.out.println(" Number of sentences: " + numSent);

        // System.out.println("Serializing...");
        // casMgr = CASFactory.createCAS();
        // casMgr.setCAS(cas);
        // cas = (CAS) casMgr.getCAS();
        /* setCAS is no longer used or implemented
         * You cannot use this method to set up a new cas with a copy of
         * the contents of another cas, including its indexes
        realCasMgr = CASFactory.createCAS();
        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
        cas = ((CASImpl) realCasMgr).getCurrentView();
        casMgr = (CASMgr) cas;
        */

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());

        // System.out.println("Serializing...");
        // casMgr = CASFactory.createCAS();
        // casMgr.setCAS(cas);
        // cas = (CAS) casMgr.getCAS();
        /* setCAS is no longer used or implemented
         * You cannot use this method to set up a new cas with a copy of
         * the contents of another cas, including its indexes
        realCasMgr = CASFactory.createCAS();
        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
        cas = ((CASImpl) realCasMgr).getCurrentView();
        casMgr = (CASMgr) cas;
        */

        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
        // System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");

        casMgr.reset();

        ++docCount;
      }
      // System.out.println("Number of documents processed: " + docCount);
    }
    overallTime = System.currentTimeMillis() - overallTime;
    // System.out.println("Time taken over all: " + new TimeSpan(overallTime));

  }


  /** Test basic blob serialization
   */
  public void testBlob() throws Exception {

    /*
     * Test that FS, indexes and strings work after repeated blob serialization
     * For each iteration, add two new FS, serialize and test all created so
     * The first FS sets the string feature using standard API => goes into stringlist
     * The second FS sets the string feature using lowlevel API => goes into stringheap
     *
     * Throw in tests of the byte, short and long heaps as well
     *
     */
  String testString = "testString";
  cas.reset();
  LowLevelCAS ll_cas = cas.getLowLevelCAS();
  FSIndexRepository ir = cas.getIndexRepository();
  int ll_strfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theStringFeature);
  int ll_bytefeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteFeature);
  int ll_shortfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortFeature);
  int ll_bytearrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteArrayFeature);
  int ll_shortarrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortArrayFeature);
  int ll_longfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theLongFeature);
 
  for (int cycle=0; cycle<10; cycle+=2) {
    FeatureStructure newFS1 = cas.createFS(theTypeType);
    ir.addFS(newFS1);
    newFS1.setIntValue(startFeature, cycle);
    newFS1.setIntValue(endFeature, cycle+1);
    // set string using normal string feature create
    newFS1.setStringValue(theStringFeature, testString);
    newFS1.setByteValue(theByteFeature, (byte)cycle);
    newFS1.setShortValue(theShortFeature, (short)cycle);
    newFS1.setLongValue(theLongFeature, (long)cycle);
    ByteArrayFS newBA1 = cas.createByteArrayFS(1);
    ShortArrayFS newSA1 = cas.createShortArrayFS(1);
    newBA1.set(0, (byte)cycle);
    newSA1.set(0, (short)cycle);
    newFS1.setFeatureValue(theByteArrayFeature, newBA1);
    newFS1.setFeatureValue(theShortArrayFeature, newSA1);

    FeatureStructure newFS2 = cas.createFS(theTypeType);
    ByteArrayFS newBA2 = cas.createByteArrayFS(1);
    ShortArrayFS newSA2 = cas.createShortArrayFS(1);
    newFS2.setIntValue(startFeature, cycle+1);
    newFS2.setIntValue(endFeature, cycle+2);
    ir.addFS(newFS2);
    // set string using lowlevel string create API
    final int llfs2 = ll_cas.ll_getFSRef(newFS2);
    final int llba2 = ll_cas.ll_getFSRef(newBA2);
    final int llsa2 = ll_cas.ll_getFSRef(newSA2);
    ll_cas.ll_setCharBufferValue(llfs2, ll_strfeatcode,
            testString.toCharArray(), 0, testString.length());
    ll_cas.ll_setByteValue(llfs2, ll_bytefeatcode, (byte)(cycle+1));
    ll_cas.ll_setShortValue(llfs2, ll_shortfeatcode, (short)(cycle+1));
    ll_cas.ll_setLongValue(llfs2, ll_longfeatcode, (long)(cycle+1));
    ll_cas.ll_setByteArrayValue(llba2, 0, (byte)(cycle+1));
    ll_cas.ll_setShortArrayValue(llsa2, 0, (short)(cycle+1));
    newFS2.setFeatureValue(theByteArrayFeature, newBA2);
    newFS2.setFeatureValue(theShortArrayFeature, newSA2);

    ByteArrayOutputStream fos = new ByteArrayOutputStream();
    Serialization.serializeCAS(cas, fos);
      cas.reset();
    ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
    Serialization.deserializeCAS(cas, fis);

    FSIndex<AnnotationFS> idx = cas.getAnnotationIndex(theTypeType);
    FSIterator<AnnotationFS> iter = idx.iterator();
    for (int tc=0; tc<cycle+1; tc++) {
      FeatureStructure testFS = iter.get();
      iter.moveToNext();
      assertTrue(tc == testFS.getIntValue(startFeature));
      assertTrue(testString.equals(testFS.getStringValue(theStringFeature)));
      assertTrue(tc == testFS.getByteValue(theByteFeature));
      assertTrue(tc == testFS.getShortValue(theShortFeature));
      assertTrue(tc == testFS.getLongValue(theLongFeature));
      ByteArrayFS ba = (ByteArrayFS)testFS.getFeatureValue(theByteArrayFeature);
      assertTrue(tc == ba.get(0));
      ShortArrayFS sa = (ShortArrayFS)testFS.getFeatureValue(theShortArrayFeature);
      assertTrue(tc == sa.get(0));
    }
    } 
  }

  public void testDeltaBlobSerialization() throws Exception {
   try {
      CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              indexes);
      CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              indexes);
      CAS cas3 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              indexes);
     
      Type personType = cas1.getTypeSystem().getType(
          "org.apache.uima.testTypeSystem.Person");
      Feature componentIdFeat = personType.getFeatureByBaseName("componentId");
      Feature confidenceFeat = personType.getFeatureByBaseName("confidence");
      Type orgType = cas1.getTypeSystem().getType(
      "org.apache.uima.testTypeSystem.Organization");
      Type ownerType = cas1.getTypeSystem().getType(
                  "org.apache.uima.testTypeSystem.Owner");
      Type entityAnnotType = cas1.getTypeSystem().getType(
    "org.apache.uima.testTypeSystem.EntityAnnotation");
      Feature mentionTypeFeat = entityAnnotType.getFeatureByBaseName("mentionType");
      Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
      Type relArgsType = cas1.getTypeSystem().getType(
                  "org.apache.uima.testTypeSystem.BinaryRelationArgs");
      Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
      Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");
     
      Type entityType = cas1.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
      Feature classesFeat = entityType.getFeatureByBaseName("classes");
      Feature linksFeat = entityType.getFeatureByBaseName("links");
      Feature canonicalFormFeat = entityType.getFeatureByBaseName("canonicalForm");
     
      Type nonEmptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST);
      Type emptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_EMPTY_FS_LIST);
      Feature headFeat = nonEmptyFsListType.getFeatureByBaseName("head");
      Feature tailFeat = nonEmptyFsListType.getFeatureByBaseName("tail");
     
      //cas1
      //initial set of feature structures
      // set document text for the initial view and create Annotations
      cas1.setDocumentText("This is a test document in the initial view");
      AnnotationFS anAnnot1 = cas1.createAnnotation(cas1.getAnnotationType(), 0, 4);
      cas1.getIndexRepository().addFS(anAnnot1);
      AnnotationFS anAnnot2 = cas1.createAnnotation(cas1.getAnnotationType(), 5, 6);
      cas1.getIndexRepository().addFS(anAnnot2);
      AnnotationFS anAnnot3 = cas1.createAnnotation(cas1.getAnnotationType(), 8, 13);
      cas1.getIndexRepository().addFS(anAnnot3);
      AnnotationFS anAnnot4 = cas1.createAnnotation(cas1.getAnnotationType(), 15, 30);
      cas1.getIndexRepository().addFS(anAnnot4);
      FSIndex<AnnotationFS> tIndex = cas1.getAnnotationIndex();
      assertTrue(tIndex.size() == 5); //doc annot plus 4 annots
     
      FeatureStructure entityFS = cas1.createFS(entityType);
      cas1.getIndexRepository().addFS(entityFS);
     
      StringArrayFS strArrayFS = cas1.createStringArrayFS(5);
      strArrayFS.set(0, "class1");
      entityFS.setFeatureValue(classesFeat, strArrayFS);
     
      //create listFS and set the link feature
      FeatureStructure emptyNode = cas1.createFS(emptyFsListType);
      FeatureStructure secondNode = cas1.createFS(nonEmptyFsListType);
      secondNode.setFeatureValue(headFeat, anAnnot2);
      secondNode.setFeatureValue(tailFeat, emptyNode);
      FeatureStructure firstNode = cas1.createFS(nonEmptyFsListType);
      firstNode.setFeatureValue(headFeat, anAnnot1);
      firstNode.setFeatureValue(tailFeat, secondNode);
      entityFS.setFeatureValue(linksFeat, firstNode);
     
      // create a view w/o setting document text
      CAS view1 = cas1.createView("View1");
     
      // create another view
      CAS preexistingView = cas1.createView("preexistingView");
      String preexistingViewText = "John Smith blah blah blah";
      preexistingView.setDocumentText(preexistingViewText);
      AnnotationFS person1Annot = createPersonAnnot(preexistingView, 0, 10);
      person1Annot.setStringValue(componentIdFeat, "deltacas1");
      AnnotationFS person2Annot = createPersonAnnot(preexistingView, 0, 5);
      AnnotationFS orgAnnot = preexistingView.createAnnotation(orgType, 16, 24);
      preexistingView.addFsToIndexes(orgAnnot);
     
      AnnotationFS ownerAnnot = preexistingView.createAnnotation(ownerType, 0, 24);
      preexistingView.addFsToIndexes(ownerAnnot);
      FeatureStructure relArgs = cas1.createFS(relArgsType);
      relArgs.setFeatureValue(domainFeat, person1Annot);
      ownerAnnot.setFeatureValue(argsFeat, relArgs);
     
      //serialize complete
      ByteArrayOutputStream fos = new ByteArrayOutputStream();
      Serialization.serializeCAS(cas1, fos);

      //deserialize into cas2
      ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
      Serialization.deserializeCAS(cas2, fis);
      CasComparer.assertEquals(cas1, cas2);
      //=======================================================================
      //create Marker, add/modify fs and serialize in delta xmi format.
      Marker marker = cas2.createMarker();
      FSIndex<AnnotationFS> cas2tIndex = cas2.getAnnotationIndex();
      CAS cas2preexistingView = cas2.getView("preexistingView");
      FSIndex<AnnotationFS> cas2personIndex = cas2preexistingView.getAnnotationIndex(personType);
      FSIndex<AnnotationFS> cas2orgIndex = cas2preexistingView.getAnnotationIndex(orgType);
      FSIndex<AnnotationFS> cas2ownerIndex = cas2preexistingView.getAnnotationIndex(ownerType);
     
      // create an annotation and add to index
      AnnotationFS cas2anAnnot5 = cas2.createAnnotation(cas2.getAnnotationType(), 6, 8);
      cas2.getIndexRepository().addFS(cas2anAnnot5);
      assertTrue(cas2tIndex.size() == 6); // prev annots and this new one
     
      // set document text of View1
      CAS cas2view1 = cas2.getView("View1");
      cas2view1.setDocumentText("This is the View1 document.");
      //create an annotation in View1
      AnnotationFS cas2view1Annot = cas2view1.createAnnotation(cas2.getAnnotationType(), 1, 5);
      cas2view1.getIndexRepository().addFS(cas2view1Annot);
      FSIndex<AnnotationFS> cas2view1Index = cas2view1.getAnnotationIndex();
      assertTrue(cas2view1Index.size() == 2); //document annot and this annot
    
      //modify an existing annotation
      Iterator<AnnotationFS> tIndexIter = cas2tIndex.iterator();
      AnnotationFS docAnnot = tIndexIter.next(); //doc annot
      AnnotationFS modAnnot1 = tIndexIter.next();
      AnnotationFS delAnnot = tIndexIter.next();
     
      //modify language feature
      Feature languageF = cas2.getDocumentAnnotation().getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_LANGUAGE);
      docAnnot.setStringValue(languageF, "en");
    
      //index update - reindex
      cas2.getIndexRepository().removeFS(modAnnot1);
      Feature endF = cas2.getAnnotationType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
      modAnnot1.setIntValue(endF, 4);
      cas2.getIndexRepository().addFS(modAnnot1);
      //index update - remove annotation from index
      cas2.getIndexRepository().removeFS(delAnnot);
 
      //modify FS - string feature and FS feature.
      Iterator<AnnotationFS> personIter = cas2personIndex.iterator();    
      AnnotationFS cas2person1 = (AnnotationFS) personIter.next();
      AnnotationFS cas2person2 = (AnnotationFS) personIter.next();
     
      cas2person1.setFloatValue(confidenceFeat, (float) 99.99);
      cas2person1.setStringValue(mentionTypeFeat, "FULLNAME");
     
      cas2person2.setStringValue(componentIdFeat, "delataCas2");
      cas2person2.setStringValue(mentionTypeFeat, "FIRSTNAME");
     
      Iterator<AnnotationFS> orgIter = cas2orgIndex.iterator();
      AnnotationFS cas2orgAnnot = (AnnotationFS) orgIter.next();
      cas2orgAnnot.setStringValue(mentionTypeFeat, "ORGNAME");
     
      //modify FS feature
      Iterator<AnnotationFS> ownerIter = cas2ownerIndex.iterator();
      AnnotationFS cas2ownerAnnot = (AnnotationFS) ownerIter.next();
      FeatureStructure cas2relArgs = cas2ownerAnnot.getFeatureValue(argsFeat);
      cas2relArgs.setFeatureValue(rangeFeat, cas2orgAnnot);
    
    //Test modification of a nonshared multivalued feature.
      //This should serialize the encompassing FS.
      Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
      FeatureStructure cas2EntityFS = iter.next();
      StringArrayFS cas2strarrayFS = (StringArrayFS) cas2EntityFS.getFeatureValue(classesFeat);
      cas2strarrayFS.set(1, "class2");
      cas2strarrayFS.set(2, "class3");
      cas2strarrayFS.set(3, "class4");
      cas2strarrayFS.set(4, "class5");
    
      //add to FSList
      FeatureStructure cas2linksFS = cas2EntityFS.getFeatureValue(linksFeat);
      FeatureStructure cas2secondNode = cas2linksFS.getFeatureValue(tailFeat);
      FeatureStructure cas2emptyNode = cas2secondNode.getFeatureValue(tailFeat);
      FeatureStructure cas2thirdNode = cas2.createFS(nonEmptyFsListType);
      cas2thirdNode.setFeatureValue(headFeat, cas2anAnnot5);
      cas2thirdNode.setFeatureValue(tailFeat, cas2emptyNode);
      cas2secondNode.setFeatureValue(tailFeat, cas2thirdNode);
     
      // serialize cas2 in delta format
      ByteArrayOutputStream fosDelta = new ByteArrayOutputStream();
      Serialization.serializeCAS(cas2, fosDelta, marker);
     
      //======================================================================
      //deserialize delta xmi into cas1
      ByteArrayInputStream fisDelta = new ByteArrayInputStream(fosDelta.toByteArray());
      Serialization.deserializeCAS(cas1, fisDelta);
     
      //======================================================================
      //serialize complete cas and deserialize into cas3 and compare with cas1.
      ByteArrayOutputStream fosFull = new ByteArrayOutputStream();
      Serialization.serializeCAS(cas2, fosFull);
      ByteArrayInputStream fisFull = new ByteArrayInputStream(fosFull.toByteArray());
      Serialization.deserializeCAS(cas3, fisFull);
      CasComparer.assertEquals(cas1, cas3);
      //System.out.println("CAS1 " + serialize(cas1, new XmiSerializationSharedData()));
      //System.out.println("CAS2 " + serialize(cas2, new XmiSerializationSharedData()));
       
  } catch (Exception e) {
        JUnitExtension.handleException(e);
  }
  }
 
  public void testDeltaBlobWithInvalidMarker() throws Exception {
    try {
       CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
               indexes);
       boolean serfailed = false;
       Marker mark1 = cas1.createMarker();
//       Marker mark2 = cas1.createMarker();  // multiple markers not supported, tested in other test case
      
       cas1.reset();
      
       try {
         ByteArrayOutputStream fos = new ByteArrayOutputStream();
         Serialization.serializeCAS(cas1, fos, mark1);
       } catch (CASRuntimeException e) {
         serfailed = true;
       }
       assertTrue(serfailed);
      
//       serfailed = false;
//       try {
//         ByteArrayOutputStream fos = new ByteArrayOutputStream();
//         Serialization.serializeCAS(cas1, fos, mark2);
//       } catch (CASRuntimeException e) {
//         serfailed = true;
//       }
//       assertTrue(serfailed);
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }

  private AnnotationFS createPersonAnnot(CAS cas, int begin, int end) {
  Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
  AnnotationFS person = cas.createAnnotation(personType, begin, end);
  cas.addFsToIndexes(person);
  return person;
  }
  public static void main(String[] args) {
    junit.textui.TestRunner.run(SerializationReinitTest.class);
  }

}
TOP

Related Classes of org.apache.uima.cas.test.SerializationReinitTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.