Package com.extentech.formats.XLS

Source Code of com.extentech.formats.XLS.Sst$SstArrayList

/*
* --------- BEGIN COPYRIGHT NOTICE ---------
* Copyright 2002-2012 Extentech Inc.
* Copyright 2013 Infoteria America Corp.
*
* This file is part of OpenXLS.
*
* OpenXLS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* OpenXLS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with OpenXLS.  If not, see
* <http://www.gnu.org/licenses/>.
* ---------- END COPYRIGHT NOTICE ----------
*/
package com.extentech.formats.XLS;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;

import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;

import com.extentech.ExtenXLS.WorkBookHandle;
import com.extentech.formats.OOXML.OOXMLConstants;
import com.extentech.formats.OOXML.Ss_rPr;
import com.extentech.toolkit.*;


/**
* <b>Sst: Shared String Table 0xFCh</b><br>
*
* Sst records contain a table of Strings possibly spanning multiple Continue
* Records
*
* <p>
*
* <pre>
*     offset  name        size    contents
*     ---   
*     4       cstTotal    4       Total number of strings in this and the
*                                 EXTSST record.
*     8       cstUnique   4       Number of unique strings in this table.
* 12      rgb         var     Array of unique strings
*
* </p>
* </pre>
*
* @see Sst
* @see Labelsst
* @see Extsst
*/
public final class Sst extends com.extentech.formats.XLS.XLSRecord {

  /**
   * serialVersionUID
   */
  private static final long serialVersionUID = 6966063306230877101L;
  private int cstTotal = -1;
  private int cstUnique = -1;
  private int boundincrement = 0;

  // continue handling
  private int numconts = -1;
  private int[] boundaries = null;
  private byte[] grbits = null;
  private List stringvector = new SstArrayList();
  private HashSet dupeSstEntries = new HashSet();
  private HashSet existingSstEntries = new HashSet();
  private Extsst myextsst = null;
  int origsstlen = 0;

  int getOrigSstLen() {
    return origsstlen;
  }

  Continue thiscont = null;
  int datalen = -1;
  int currbound = 0;

  byte[] deldata = null;

  public int getRealOriginalSize() {
    return this.originalsize;
  }

  public void setData(byte[] b) {
    if (data == null) {
      this.originalsize = b.length;
    }
    super.setData(b);
  }

  /**
   * set the Extsst rec for this Sst
   */
  void setExtsst(Extsst e) {
    this.myextsst = e;
  }

  Extsst getExtsst() {
    return this.myextsst;
  }

  /**
   * removes all existing Continues from the Sst
   */
  public void removeContinues() {
    super.removeContinues();
    this.continues = null;
    thiscont = null;
  }

  /**
   * initialize the Continue Handling counters
   */
  private void initContinues() {
    // create the array of record boundary offsets
    // this allows us to detect spanning UNICODE strings in CONTINUE
    // records...
    datalen = this.getLength();
    numconts = this.getContinueVect().size();
    boundaries = new int[numconts + 1];
    int thisbound = 0, ir = 0;
    grbits = new byte[numconts];
    // continues = this.getContinueVect();
    Iterator it = this.continues.iterator();
    while (it.hasNext()) {
      Continue ci = (Continue) it.next();
      // byte[] b = ci.getData(); // REMOVE
      grbits[ir++] = ci.getGrbit();
      this.getStreamer().removeRecord(ci); // remove existing continues
                          // from stream
    }
    thisbound = this.getRealOriginalSize();
    boundaries[0] = thisbound;
    int lastcontlen = 0;
    for (int i = 1; i < boundaries.length; i++) {
      Continue cxi = (Continue) continues.get(i - 1);
      int contlen = cxi.getLength();
      if (cxi.getHasGrbit())
        contlen--;
      thisbound += contlen;
      // if(DEBUGLEVEL > 5)Logger.logInfo( contlen + ",");
      lastcontlen += contlen;
      datalen += contlen;
      boundaries[i] = thisbound - (4 * i);
      cxi.setContinueOffset(boundaries[i - 1]);
    }
    // if(DEBUGLEVEL > 5) Logger.logInfo("");
    // for(int i = 1;i<boundaries.length;i++){
    // int contlen =continues[i-1].getLength();
    // if(DEBUGLEVEL > 5) Logger.logInfo("0x" + continues[i-1].getGrbit() +
    // ",");
    // }
    // if(DEBUGLEVEL > 5) Logger.logInfo("");
    thisbound = 0;
  }

  /**
   * because the SST comes between the BOUNDSHEET records and all BOUNDSHEET
   * BOFs, the lbPlyPos needs to change for all of them when record size
   * changes.
   */
  public boolean getUpdatesAllBOFPositions() {
    return true;
  }

  public void init() {
    if (originalsize == 0)
      originalsize = reclen;
    Sst.init(this);
  }

  /**
   * Initializes the sst as well as initializing the UnicodeStrings contained
   * within
   */
  public static void init(Sst sst) {
    sst.origsstlen = sst.getLength();
    sst.currbound = 0;
    sst.stringvector.clear();

    // init the string cache for fast access of init vals

    // get the row, col and ixfe information
    sst.cstTotal = ByteTools.readInt(sst.getByteAt(0), sst.getByteAt(1),
        sst.getByteAt(2), sst.getByteAt(3));
    sst.cstUnique = ByteTools.readInt(sst.getByteAt(4), sst.getByteAt(5),
        sst.getByteAt(6), sst.getByteAt(7));
    int strlen = 0, strpos = 8;

    if (sst.DEBUGLEVEL > 5)
      Logger.logInfo("INFO: initializing Sst: " + sst.cstTotal
          + " total Strings, " + sst.cstUnique + " unique Strings.");
    // Initialize continues records
    sst.initContinues();

    // initialize the Unicodestrings from the byte array
    for (int d = 0; d < sst.cstUnique; d++) {
      // Unicodestring values
      int numruns = 0, runlen = 0;
      // the number of formatting runs each one adds 4 bytes
      int basereclen = 3; // the base length of the ustring being created
      int cchExtRst = 0; // the length of any Extended string data
      boolean doubleByte = false; // whether this is a double-byte string
      byte grbit = 0x0;
      // the grbit tells us what kind of Unicodestring this is
      if (sst.DEBUGLEVEL > 30)
        Logger.logInfo("Initializing String: " + String.valueOf(d)
            + "/" + sst.cstTotal);
      // figure out the boundary offsets
      int offr = sst.boundaries.length;
      if (offr < 1)
        offr = 0;
      else
        offr = 1;
      if (strpos >= sst.boundaries[sst.boundaries.length - offr])
        break;
      short[] recdef = sst.getNextStringDefData(strpos);
      // get the length of the Unicodestring
      strlen = recdef[0];
      grbit = (byte) recdef[1];

      // we only want the bottom 4 bytes of the grbit, & not bit 2.. other
      // stuff is junk
      // grbit = (byte)(0xD & grbit);

      // init the string cache for fast access of init vals
      // st.initCacheBytes(strpos, 10); // commented out as it causes
      // array errors when short strings at end of continue boundary

      XLSRecord currec = sst;
      if (sst.DEBUGLEVEL > 5)
        Logger.logInfo("INFO: StrLen:" + strlen + " Strpos:" + strpos + " bound:" + sst.boundaries[sst.currbound]);

      if (strpos >= sst.boundaries[0])
        currec = sst.thiscont;
      switch (grbit) {

      case 0x1: // non-rich, double-byte string
        doubleByte = true;
        break;

      case 0x4: // non-rich, single byte string
        cchExtRst = ByteTools.readInt(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4),
            currec.getByteAt(strpos + 5),
            currec.getByteAt(strpos + 6));
        basereclen = 7;
        doubleByte = false;
        break;

      case 0x5: // extended, non-rich, double-byte string
        cchExtRst = ByteTools.readInt(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4),
            currec.getByteAt(strpos + 5),
            currec.getByteAt(strpos + 6));
        basereclen = 7;
        doubleByte = true;
        break;

      case 0x8: // rich single-byte UNICODE string
        numruns = ByteTools.readShort(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4));
        runlen = numruns * 4;
        basereclen = 5;
        doubleByte = false;
        break;

      case 0x9: // rich double-byte UNICODE string
        numruns = ByteTools.readShort(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4));
        runlen = numruns * 4;
        basereclen = 5;
        doubleByte = true;
        break;

      case 0xc: // rich single-byte eastern string
        numruns = ByteTools.readShort(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4));
        cchExtRst = ByteTools.readInt(currec.getByteAt(strpos + 5),
            currec.getByteAt(strpos + 6),
            currec.getByteAt(strpos + 7),
            currec.getByteAt(strpos + 8));
        runlen = numruns * 4;
        basereclen = 9;
        doubleByte = false;
        break;

      case 0xd: // rich double-byte eastern string
        numruns = ByteTools.readShort(currec.getByteAt(strpos + 3),
            currec.getByteAt(strpos + 4));
        cchExtRst = ByteTools.readInt(currec.getByteAt(strpos + 5),
            currec.getByteAt(strpos + 6),
            currec.getByteAt(strpos + 7),
            currec.getByteAt(strpos + 8));
        runlen = numruns * 4;
        basereclen = 9;
        doubleByte = true;
        break;

      default:
        doubleByte = false;
        cchExtRst = 0;
        basereclen = 3;
        if (grbit != 0x0) {
          // if(st.DEBUGLEVEL > 10)
          Logger.logWarn("ERROR: Invalid Unicodestring grbit:"
              + String.valueOf(grbit));
        }
      }
      // create the String
      if (strlen == 0) {
        if (sst.DEBUGLEVEL > 10)
          Logger.logWarn("WARNING: Attempt to initialize Zero-length String.");
      }
      if (doubleByte)
        strlen *= 2;
      // it's a double-byte string so total size is *2
      try {
        strpos = sst.initUnicodeString(strlen, strpos, basereclen, cchExtRst, runlen, doubleByte);
       
        // Logger.logInfo("SST Currbound: " + sst.currbound +" strpos: "
        // + strpos);
        // if(st.DEBUGLEVEL > 5)Logger.logInfo("numruns: "
        // +String.valueOf(numruns)+" @"+String.valueOf(strpos)
        // +" len: " + String.valueOf(strlen) + " gr: "
        /// +String.valueOf(grbit) +
        // " base: "+String.valueOf(basereclen)+
        // " cchExtRst: "+String.valueOf(cchExtRst));
      } catch (Exception e) {
        Logger.logWarn("ERROR: Error Reading String @ " + strpos + e.toString() + " Skipping...");
        strpos += strlen + basereclen + runlen;
      }
    }
    if (sst.DEBUGLEVEL > 5)
      Logger.logInfo("Done reading SST.");
  }

  /**
   * retrieves the sst string at the location pos and returns the next
   * position
   *
   * @param ustrLen
   *            actual unicode string length, (not including formatting runs,
   *            phonetic data or double byte multiplication)
   * @param pos
   *            position in the source data buffer
   * @param ustrStart
   *            start of unicode string within single sst record
   * @param cchExtRst
   *            phonetic data length
   * @param runlen
   *            fomratting run length
   * @param doublebyte
   *            true if unicode string data is double byte (and then the size
   *            of the unicode string data array is ustrLen * 2)
   * @return
   */
  int initUnicodeString(int ustrLen, int pos, int ustrStart, int cchExtRst, int runlen, boolean doublebyte){   
        int bufferBoundary = boundaries[currbound]; // get the current boundary
        int totalStrLen = ustrStart + ustrLen + cchExtRst + runlen;    // calculate the total byte length of the unicode string
        int posEnd = pos + totalStrLen; // end position -- if > current record length must span and access next record/continues
        AtomicInteger uLen= new AtomicInteger(ustrLen); // same as ustrLen but mutable in order to allow changing value in getData method

        // begin checking string against current record buffer boundary
        if(posEnd < bufferBoundary){// string does not cross current boundary - easy! retrieve totalStringLen bytes and create unicode string
            byte[] newStringBytes = getData(uLen, pos, ustrStart, cchExtRst, runlen, doublebyte, false);
            this.initString(newStringBytes, pos, false);
            return posEnd;
        }else if(posEnd == bufferBoundary){// string is on the boundary - easy!
            if((this.numconts==0)||(this.numconts == this.contcounter)){
                if(DEBUGLEVEL > 5)Logger.logInfo("Last String in SST encountered.");
            }
            byte[] newStringBytes = getData(uLen, pos, ustrStart, cchExtRst, runlen, doublebyte, false);
            this.initString(newStringBytes, pos,false);
           
      /* "If fHighByte is 0x1 and rgb is extended with a Continue record the break
         MUST occur at the double-byte character boundary."
      */  // because we ended on a string, there is no grbit on the next continue
            if(this.continues.size() > this.currbound){
                thiscont =(Continue)this.continues.get(this.currbound);
                currbound++;
                if(thiscont.getHasGrbit()){
                    thiscont.setHasGrbit(false);
                    this.shiftBoundaries(1);
                }
            }
            return posEnd;
        }
               
        // spans or crosses the continue boundary
        byte[] newStringBytes= getData(uLen, pos, ustrStart, cchExtRst, runlen, doublebyte, true)// retrieve the bytes, accounting for spanning (true)
        this.initString(newStringBytes, pos, false);
        return pos + (uLen.intValue()+ustrStart+cchExtRst+runlen);    // in most cases should be same as pos + totalStrLen but it's possible for uLen to be changed in getData
    }

  /**
   * gets the sst string at strpos length allstrlen and returns the next
   * position
   *
   * @param allstrlen
   * @param strpos
   * @param strend
   * @param STATE
   * @return
   *
   *
   *         KSC: replacing int getString(int allstrlen, int strpos, int
   *         strend, int[] STATE){ byte[] newStringBytes = getData(allstrlen,
   *         strpos, STATE); int nextpos = strpos + allstrlen;
   *         if(STATE[SI_SPANSTATE]==Sst.STATE_EXRSTSPAN){
   *         this.initString(newStringBytes, strpos,true); }else{
   *         this.initString(newStringBytes, strpos,false); }
   *
   *         if(strend != nextpos) Logger.logWarn(
   *         "Sanity Check in Sst initUnicodeString(): strend != nextpos.");
   *
   *         return nextpos; }
   */

  /**
   * Adjust the boundary pointers based on whether we need to compensate for
   * grbit anomalies
   *
   *
   * NOT USED
   *
  void shiftBoundariesX(int x) {
    int ct = 0;
    Iterator it = continues.iterator();
    while (it.hasNext()) {
      Continue nextcont = (Continue) it.next();
      if (ct++ >= this.currbound) {
        nextcont.setContinueOffset(nextcont.getContinueOffset() + x);
        boundaries[ct] = nextcont.getContinueOffset();
        if (DEBUGLEVEL > -5)
          Logger.logInfo("Sst.shiftBoundaries() Updated " + nextcont
              + " : " + nextcont.getContinueOffset());
      }
    }
    if (boundaries.length == (this.continues.size() + 1)) {
      boundaries[this.continues.size()] += x;
    }
  }*/

  /**
   * Adjust the boundary pointers based on whether we need to compensate for
   * grbit anomalies
   *
   * */
  void shiftBoundaries(int x) {
    // int ret = 0;
    for (int t = currbound; t < this.continues.size(); t++) {
      Continue nextcont = (Continue) this.continues.get(t);
      nextcont.setContinueOffset(nextcont.getContinueOffset() + x);
      boundaries[t] = nextcont.getContinueOffset();
      if (DEBUGLEVEL > 5)
        Logger.logInfo("Sst.shiftBoundaries() Updated " + nextcont
            + " : " + nextcont.getContinueOffset());
    }
    if (boundaries.length == (this.continues.size() + 1)) {
      boundaries[this.continues.size()] += x;
    }
  }

  /**
   * Refactoring Continue data access
   *
   *
   * @param i
   * @return
   */
  short[] getNextStringDefData(int start) {
    short[] ret = { (short) 0x0, (short) 0x0 };
    try {
      // int thiscont = -1;
      int end = start + 3;
      if (end <= boundaries[0]) { // it's in the main Sst data
        ret[0] = ByteTools.readShort(getByteAt(start++),
            getByteAt(start++));
        ret[1] = (short) getByteAt(start);
        return ret;
      }

      // KSC: no need as Sst.getData increments correctly ...
      // this.thiscont = this.getContinue(end);
      byte b0 = this.thiscont.getByteAt(start++);
      byte b1 = this.thiscont.getByteAt(start++);
      byte b2 = this.thiscont.getByteAt(start++);
      ret[0] = (short) ByteTools.readShort(b0, b1);
      ret[1] = (short) b2;
    } catch (Exception e) {
      if (DEBUGLEVEL > 0)
        Logger.logWarn("possible problem parsing String table getting next string def data: "
            + e);
    }
    return ret;
  }

  /**
   * return the continue that contains up to t length
   *
   * @param t
   * @return
   */
  Continue getContinue(int t) {
    if (t - 1 == datalen) {
      return (Continue) this.continues.get(this.continues.size() - 1);
    }
    for (int x = this.boundaries.length - 1; x >= 0; x--) {
      if (t > boundaries[x]) {
        return (Continue) this.continues.get(x);
      }
    }
    return null;
  }

  /**
   * get the string data from the proper place (either this sst record, or one or more continues)
   *
   * <br>
   * NOTES: if the string spans a continue, the length in bytes of each part
   * is contained in the first two ints.
   *
   * if the record spans a Continue, we need to see if the border falls within
   * text data or extra data
   *
   * 10 len, really 15 bytes uncomp ||gr comp 2,0,1,0,2,0,3,0,3,0,||0
   * ,4,5,5,7,8
   *
   *
   * @param ustrLen    unicode string length
   * @param pos      position in buffer
   * @param ustrStart    start of unicode string part (after initial length(s))
   * @param cchExtRst    phonetic data length or 0 if none
   * @param runlen    formatting runs length or 0 if none
   * @param doublebyte  true of doublebyte
   * @param bSpans    true if spans records
   * @return byte[] defining unicode string
   */
  byte[] getData(AtomicInteger ustrLen, int pos, int ustrStart, int cchExtRst, int runlen, boolean doublebyte, boolean bSpans) {    int totalStrLen = ustrStart + ustrLen.intValue() + cchExtRst + runlen;
    int posEnd = pos + totalStrLen; // buffer end position

    if (posEnd <= boundaries[0]) { // it's in the main Sst data just grab string and return
      return this.getBytesAt(pos, totalStrLen);
    }

    // if it's in the current continues without spanning, just get the bytes and return
    if (!bSpans) { // Simple -- no Span, return data
      pos += thiscont.grbitoff;
      int thisoff = pos - thiscont.getContinueOffset();
      return thiscont.getBytesAt(thisoff, totalStrLen);
    }
   
    // if string spans two or more records, must deal with boundaries and grbits and lots of complications ...
    int bufferBoundary = boundaries[currbound]; // get the current boundary
    if (DEBUGLEVEL > 5)
      Logger.logInfo("Crossing Boundary: " + bufferBoundary
          + ".  Double-Bytes: " + doublebyte);

    // get ensuing record (previous==thiscont.predecessor)
    if ((currbound) < continues.size()) {
      thiscont = (Continue) continues.get(currbound++);
    }

    // find out where break is
    int currpos = pos + totalStrLen;
    boolean bfoundBreak = false;
    boolean bUnCompress = false; // true if string on previous boundary must be uncompressed
    boolean bUnCompress1 = false; // true if string1 must be uncompressed ** this one is confusing but works **

    // check if break is in ExtRst data (==phonetic data)
    if (cchExtRst > 0) {
      currpos -= cchExtRst;
      if (currpos <= bufferBoundary) {
        if (DEBUGLEVEL > 5)
          Logger.logInfo("Continue Boundary in ExtRst data.");
        if (thiscont.getHasGrbit()) {
          thiscont.setHasGrbit(false);
          this.shiftBoundaries(1);
        }
        bfoundBreak = true;
      }
    }

    // check if break is in formatting run data
    if (runlen > 0) {
      currpos -= runlen;
      if (!bfoundBreak && currpos <= bufferBoundary) { // check against japanese!
        if (DEBUGLEVEL > 5)
          Logger.logInfo("Continue Boundary in Formatting Run data.");
        if (thiscont.getHasGrbit()) {
          this.shiftBoundaries(1);
          thiscont.setHasGrbit(false);
        }
        bfoundBreak = true;
      }
    }

    // otherwise the break is in unicode stringdata part
    currpos = pos + ustrStart;
    if (!bfoundBreak && currpos < bufferBoundary) {
      if (ustrLen.intValue() == 0) { // a ONE BYTE String on the boundary! Add the grbit back to the Continue
        if (DEBUGLEVEL > 5)
          Logger.logInfo("1 byte length String on the Continue Boundary.");
        boundaries[boundaries.length - 1]++; // increment the last boundary...
      }
    }

    // check if break is within the actual ustring data
    if (((currpos <= bufferBoundary) && ((currpos + ustrLen.intValue()) > bufferBoundary))) { // is break within String portion// ?
      if (DEBUGLEVEL > 5)
        Logger.logInfo("Continue Boundary in String data.");
      if (!thiscont.getHasGrbit()) { // when does this happen???
        thiscont.setHasGrbit(true);
        this.shiftBoundaries(-1);
      }

      byte b = thiscont.getGrbit();
      // If it changes double --> single or single --> double then adjust accordingly (plus set bUnCompress or bUnCompress1 flags which govern how bytes are accessed)
      if (doublebyte && (b == 0x0)) { // it is in doublebytes but it really should be compressed
        int preBoundaryBytes = bufferBoundary - pos - ustrStart;
        int postBoundaryBytes = pos + ustrStart + ustrLen.intValue() - bufferBoundary;
        postBoundaryBytes = postBoundaryBytes / 2;
        ustrLen.set(preBoundaryBytes + postBoundaryBytes);
        bUnCompress1 = true; // dunno what this means but it works ...
      } else if (!doublebyte && (b == 0x1)) { // string portion on prevouos boundary should be uncompressed/converted to doublebyte
        int preBoundaryBytes = bufferBoundary - pos - ustrStart;
        int postBoundaryBytes = pos + ustrStart + ustrLen.intValue() - bufferBoundary;
        postBoundaryBytes = postBoundaryBytes * 2;
        ustrLen.set(preBoundaryBytes + postBoundaryBytes);        
        bUnCompress = true;
      }
      // ustrLen may have changed above - reset vars
      totalStrLen = ustrStart + ustrLen.intValue() + cchExtRst + runlen;
      posEnd= pos + totalStrLen;
    }

    // calculate length on current record (=thiscont.predecessor) and length on ensuing continues
    int string1ByteLength = pos - this.thiscont.getContinueOffset(); // bytes on 1st record or continue
    if (string1ByteLength < 0)
      string1ByteLength *= -1;
    int string2ByteLength = totalStrLen - string1ByteLength; // bytes on 2nd// or// ensuing// continues// ==// spanned// bytes
    int extraData = (cchExtRst + runlen); // non-string-data (phonetic info// and/or formatting runs)

    // remove ExtRst and runlen info from 2nd String length
    string2ByteLength -= extraData;
    if (string2ByteLength <= 0) {
      // if it spans we want extr just to be the bytes on the second continue
      extraData = string2ByteLength + extraData;
      string2ByteLength = 0;// all String data is contained in prior // Continue
    }

    byte[] string1bytes = null, string2bytes = null;

    // we need to expand the first section bytes to fit the last one (????)
    if (this.thiscont.predecessor instanceof Continue) {
      pos = ((this.thiscont.predecessor.getLength()) - (string1ByteLength));
      pos -= 4;
    }
    if (this.thiscont.getHasGrbit()) {
      thiscont.grbitoff = 1;
    } else {
      thiscont.grbitoff = 0;
    }

    // *********************************************************************************************************
    // handle the part of unicode string on previous continues
    if (!bUnCompress) {
      string1bytes = this.thiscont.predecessor.getBytesAt(pos, string1ByteLength);
    } else { // portion of string on previous boundary is singlebyte; ensuing portion is doublebyte; must convert previous to doublebyte
      string1bytes = convertCompressedBytesToDoubleBytes(pos, string1ByteLength, ustrStart);
    }

    // *********************************************************************************************************
    // handle part on current (and ensuing, if necessary) continues
    if (string2ByteLength < MAXRECLEN) { // 99.9% usual case
      if (!bUnCompress1) {
        string2bytes = thiscont.getBytesAt(0 + thiscont.grbitoff, string2ByteLength);
      } else { // Expand the second string bytes
        string2ByteLength *= 2;
        string2bytes = new byte[string2ByteLength];
        for (int t = 0; t < string2ByteLength / 2; t++) {
          string2bytes[(t * 2)] = this.thiscont.getByteAt(t + this.thiscont.getContinueOffset());
        }
      }
      // since we've accessed the last bytes of the prior Continue, blow
      // it out!
      if (this.thiscont.predecessor instanceof Continue) this.thiscont.predecessor.setData(null);
    } else { // string2ByteLength spans continue(s) ************************************************* see infoteria/cannotread824315.xls
      int blen = string2ByteLength;
      int idx = 0;
      int start= 0;
      string2bytes = new byte[blen];
      while (blen > 0) {        // loop thru ensuing continues until correct length is read in
        int curlen = Math.min(start+thiscont.getLength()-thiscont.grbitoff, blen);       
        if (!bUnCompress1) {
          byte[] tmp = thiscont.getBytesAt(start+thiscont.grbitoff, curlen);
          System.arraycopy(tmp, 0, string2bytes, idx, curlen);
        } else { // Expand the second string bytes - NOTE: This has not been hit so hasn't been tested ...
          curlen *= 2;
          byte[] tmp = new byte[curlen];
          for (int t = 0; t < curlen / 2; t++) {
            tmp[(t * 2)] = this.thiscont.getByteAt(t + this.thiscont.getContinueOffset());
          }
          System.arraycopy(tmp, 0, string2bytes, idx, curlen);
        }
        // since we've accessed the last bytes of the prior Continue, blow it out!
        if (this.thiscont.predecessor instanceof Continue) this.thiscont.predecessor.setData(null);
        if (curlen >= thiscont.getLength()-thiscont.grbitoff) { // finished this one, get next continue
          if ((currbound) < continues.size())
            thiscont = (Continue) continues.get(currbound++);
          if (this.thiscont.getHasGrbit())
            thiscont.grbitoff = 1;
          else
            thiscont.grbitoff = 0;
          start= 4;    // don't understand this but, hey, it works ...
        } else // we are done
          // get current length in current continues only (==start postion for extra data, if any)
          string2ByteLength= curlen+start;
          break;
        }
        idx += curlen;
        blen -= curlen;
      }
    }
   
    // ***********************************************************************************************************
    // now put together the string bytes - string1bytes and string2bytes are the ustring only, excluding extraData (formatting or phoentic data)
    byte[] returnstringbytes = new byte[string1bytes.length + string2bytes.length + extraData];
    System.arraycopy(string1bytes, 0, returnstringbytes, 0, string1bytes.length);
    System.arraycopy(string2bytes, 0, returnstringbytes, string1bytes.length, string2bytes.length);

    // does it have ExtRst or Formatting Runs?
    if (extraData > 0) {
      if (posEnd <= boundaries[currbound]) { // usual case!!
        int startpos = string2ByteLength;
        if (bUnCompress1) startpos /= 2;
        if (string2ByteLength!=0)startpos+=this.thiscont.grbitoff;    // dunno why but it works ...
        byte[] rx2 = thiscont.getBytesAt(startpos, extraData);
        System.arraycopy(rx2, 0, returnstringbytes, (string1bytes.length + string2bytes.length), extraData);
      } else // extraData spans continues ... 
        // have to get portion on prev continue and rest on next continue ... sigh ....
        int startpos = string2ByteLength; 
        if (bUnCompress1) startpos /= 2;
        byte[] rx2= new byte[extraData];
        string1ByteLength = this.thiscont.getLength()-startpos; // bytes on 1st record or continue
        string2ByteLength = extraData- string1ByteLength; // bytes on 2nd// or// ensuing// continues// ==// spanned// bytes
        if ((currbound) < continues.size()) {
          thiscont = (Continue) continues.get(currbound++);
          if (this.thiscont.getHasGrbit()) {
            thiscont.grbitoff = 1;
          } else {
            thiscont.grbitoff = 0;
          }
        }
        pos = ((this.thiscont.predecessor.getLength()) - (string1ByteLength));
        pos -= 4;
        int start= 4// why?????
        System.arraycopy(this.thiscont.predecessor.getBytesAt(pos, string1ByteLength), 0, rx2, 0, string1ByteLength);
        System.arraycopy(thiscont.getBytesAt(start + thiscont.grbitoff, string2ByteLength), 0, rx2, string1ByteLength, string2ByteLength);
        System.arraycopy(rx2, 0, returnstringbytes, (string1bytes.length + string2bytes.length), extraData);
        ustrLen.set(ustrLen.get()-1)// ???? DO NOT UNDERSTAND THIS BUT IT APPEARS TO WORK - hits on infoteria/cannotread824315.xls
      }     
    }
   
    if (DEBUGLEVEL > 23)
      Logger.logInfo("Total Length from Continue: " + returnstringbytes.length);
    return returnstringbytes;
  }

  /**
   * for rare occurrences where string portion on previous boundary is flagged singlebyte/compressed,
   * and the ensuing continue is flagged doublebyte; in these cases the unicode-string-portion on the
   * previous boundary must be converted to doublebyte
   *
   * @param pos      positon on previous boundary
   * @param totallen    total string length on previous boudary
   * @param uStrStart    start of unicode string portion
   * @return
   */
  byte[] convertCompressedBytesToDoubleBytes(int pos, int totallen, int uStrStart) {
    int uLenOnPrevious= (totallen-uStrStart)// unicode string portion on previous boundary
    byte[] converted= new byte[uStrStart + uLenOnPrevious*2];
    System.arraycopy(this.thiscont.predecessor.getBytesAt(pos, uStrStart), 0, converted, 0, uStrStart);
    byte[] ustr= this.thiscont.predecessor.getBytesAt(pos+uStrStart, uLenOnPrevious);
    converted[2]= (byte)(converted[2] | 0x1)// flag as doublebyte/uncompressed for unicode string processing
    for (int i= 0; i < uLenOnPrevious; i++) {  // copy rest of unicode string portion on prev boundary as doublebyte
      converted[uStrStart+ (i*2)]=  ustr[i];     
    }
    return converted;
   
  }

  /**
   * given unicode bytes, create a Unicodestring and add it to the string vector
   */
  Unicodestring initString(byte[] newStringBytes, int strpos,
      boolean extrstbrk) {
    // create a new Unicodestring, set its data
    Unicodestring newString = new Unicodestring();
    newString.setSSTPos(strpos);
    newString.init(newStringBytes, extrstbrk);

    // add the new String to the String table and return the new pointer
    if (DEBUGLEVEL > 5)
      Logger.logInfo(" val: " + newString.toString());
    if (newString.getLen() == 0) {
      Logger.logInfo("Adding zero-length string!");
    } else {
      this.putString(newString);
    }
    return newString;
  }

  int retpos = -1;
  private int putString(Unicodestring newString) {
    ++retpos;
    ((SstArrayList) stringvector).put((Object) newString, Integer.valueOf(
        retpos));
    return retpos;
  }

  /**
   * clear out object references in prep for closing workbook
   */
  public void close() {
    cbounds.removeAllElements();
    sstgrbits.removeAllElements();
    stringvector.clear();
    stringvector = new SstArrayList();
    dupeSstEntries.clear();
    dupeSstEntries = new HashSet();
    existingSstEntries.clear();
    existingSstEntries = new HashSet();
  }

  /**
   * call this method after changing the value of an SST Unicode string to
   * update the underlying SST byte array.
   */
  void updateUnicodestrings() {
    // TODO: OPTIMIZE: check that the Sst has changed
    // reset defaults
    cbounds = new CompatibleVector();
    sstgrbits = new CompatibleVector();
    lastwasbreakable = true;
    stringisonbound = false;
    laststringwasonbound = false;
    islast = false;
    thisbounds = WorkBookFactory.MAXRECLEN;
    lastbounds = 0;
    contcounter = 0;
    lastlen = 0;
    grbitct = 0;
    dl = 0;
    leftoverlen = 0;
    gr = 0x0;

    // loop through the strings and copy their
    // bytes to the SST byte array.
    // byte[] tmp = new byte[0];
    byte[] cstot = ByteTools.cLongToLEBytes(cstTotal);
    byte[] cstun = ByteTools.cLongToLEBytes(cstUnique);

    // TODO: OPTIMIZE!!
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try {
      out.write(cstot);
      out.write(cstun);
    } catch (IOException e) {
      Logger.logInfo("Exception getting String bytes: " + e);
    }

    if (this.stringvector.size() > 0) {
      // now get the continue boundaries
      int thispos = 8, lastpos = 0;
      cbounds.removeAllElements();
      sstgrbits.removeAllElements();
      byte[] strb = null;
      Iterator it = stringvector.iterator();
      while (it.hasNext()) {
        Object ob = it.next();
        Unicodestring str = (Unicodestring) ob;

        // from updateUnicodeStrings()
        str.setSSTPos(thispos);

        strb = str.read();
        try {
          out.write(strb);
        } catch (IOException e) {
          Logger.logInfo("Exception getting String bytes: " + e);
        }
        lastpos = thispos;
        thispos = lastpos + strb.length;
        checkOnBoundary(str, lastpos, thispos + 4, strb); // add 4
                                  // because 4
                                  // added to
                                  // boundaries
      }

      if (leftoverlen > 0) {// there was leftover data!
        cbounds.add(Integer.valueOf(leftoverlen));
        sstgrbits.add(new Byte(gr));
      }
      if (cbounds.size() > 0)
        numconts = cbounds.size() - 1;
      else
        numconts = 0;

      byte[] bb = out.toByteArray();
      if (sanityCheck(bb.length)) {
        this.setData(bb);
      } else {
        this.datalen = bb.length + 4;
        this.updateUnicodestrings();
      }

    }
    if ((DEBUGLEVEL > 15) && (cbounds != null)) {
      for (int t = 0; t < cbounds.size(); t++)
        Logger.logInfo(((Integer) cbounds.get(t)).intValue() + ",");
      Logger.logInfo("");
    }
    if ((DEBUGLEVEL > 150) && (sstgrbits != null)) {
      for (int t = 0; t < sstgrbits.size(); t++)
        Logger.logInfo("0x" + ((Byte) sstgrbits.get(t)).byteValue()
            + ",");
      Logger.logInfo("");
    }
  }

  /**
   * Checks that the continues make sense. In some cases the continue lengths
   * will be wrong due to the datalen being off. Datalen is created via
   * offsets, not absolutes, so this can occur. If so, we will reset the
   * datalen to the correct number.
   *
   * @return
   */
  private boolean sanityCheck(int realLen) {
    long contLens = 0;
    for (int i = 0; i < cbounds.size(); i++) {
      Integer intgr = (Integer) cbounds.get(i);
      contLens += intgr.intValue();
    }
    if (((datalen - 4) - contLens) > 8223) {
      if (DEBUGLEVEL > 1)
        Logger.logWarn("SST continue lengths not correct, regenerating");
      return false;
    }
    return true;
  }

  CompatibleVector cbounds = new CompatibleVector();
  CompatibleVector sstgrbits = new CompatibleVector();
  boolean lastwasbreakable = true, stringisonbound = false,
      laststringwasonbound = false, islast = false;
  int thisbounds = WorkBookFactory.MAXRECLEN, lastbounds = 0,
      contcounter = 0, lastlen = 0, grbitct = 0, dl = 0, leftoverlen = 0;
  byte gr = 0x0;

  /*
   * handle the checking of Continue boundary strings
   */
  int stringnumber = 0;
  int continuenumber = -1;
  int lastgrbit = 0;

  void checkOnBoundary(Unicodestring str, int lastpos, int thispos,
      byte[] strb) {
    if (false)
      Logger.logInfo("Checking Sst boundary: " + lastpos + "/" + thispos
          + ":" + thisbounds + " ContinueNumber: " + continuenumber
          + " StringNumber:" + stringnumber++ + "numboundaries"
          + cbounds.size());

    while (thispos >= (thisbounds)) {
      continuenumber++;
      if (DEBUGLEVEL > 5)
        Logger.logInfo(String.valueOf(thisbounds));

      // check whether the string can safely be split
      boolean breaksok = str.isBreakable(thisbounds);
      int contlen = 0;

      // get the Continue grbit
      gr = strb[0]; // default is a non-grbit -- if it doesn't break, we
              // don't want it

      if ((breaksok))
        gr = this.getContinueGrbitFromString(str);
      if (DEBUGLEVEL > 5)
        Logger.logInfo(" String @: " + thispos + " is breakable: "
            + breaksok);

      // deal with string break subtleties
      contlen = WorkBookFactory.MAXRECLEN; // the default
      if (islast) {
        contlen = leftoverlen;
        leftoverlen = 0;
        contlen++;
        if (!lastwasbreakable)
          contlen++;
      } else if (!breaksok) {
        stringisonbound = true; // we are
        contlen = lastpos - lastbounds;
        lastbounds = lastpos;
      } else {
        // check if it's double byte, if so, make sure that the break is
        // not in the middle of a character.
        if (breaksok && (gr == 1)) {
          if (str.charBreakOnBounds(thisbounds + lastgrbit)) {
            contlen--;
          }
        }
      }

      // set continue length
      if ((!laststringwasonbound) && lastwasbreakable
          && (contcounter > 0)) { // normal w/grbit
        if (!breaksok) {
          cbounds.add(Integer.valueOf(contlen));
        } else {
          if (!islast)
            thisbounds--;
          cbounds.add(Integer.valueOf(contlen - 1));
        }
      } else {
        cbounds.add(Integer.valueOf(contlen));
      }

      // set grbit add null if the Continue should not have a grbit
      if (str.cch < 2) {
        sstgrbits.add(null);
        lastgrbit = 0;
      } else if (!breaksok && ((gr < 0x2) && (gr >= 0x0))) {
        sstgrbits.add(null);
        lastgrbit = 0;
      } else {
        sstgrbits.add(new Byte(gr));
        lastgrbit = 1;
      }

      contcounter++;

      // reset stuff
      lastwasbreakable = breaksok;
      laststringwasonbound = stringisonbound;
      stringisonbound = false;

      lastlen = contlen;
      if (breaksok)
        lastbounds = thisbounds;

      // datalen will be smaller than reclen
      // if continues were not created
      if (reclen > datalen)
        dl = this.reclen;
      else
        dl = this.datalen;
      // 20060518 KSC: handle segments that fall between the extra 4 added
      // to the boundary ...
      if ((thisbounds + contlen + 4) < dl) { // not the last one
        thisbounds += contlen;
        lastpos += contlen; // 20090407 KSC: If !breaksok but still
                  // loops, must increment lastpos or infinite
                  // loops [BUGTRACKER 2355 Infoteria OOM]
      } else if (!islast) {
        leftoverlen = dl + 4 - lastlen;
        if (!lastwasbreakable && (leftoverlen > 0))
          leftoverlen++;
        thisbounds = dl;
        islast = true;
      } else {
        thisbounds += contlen;
      }

    }
  }

  /**
   * This returns the Continue record grbit which is either 0 or 1 -- NOT the
   * string's grbit which determines much more...
   */
  byte getContinueGrbitFromString(Unicodestring str) {
    byte grb = 0x0;
    switch (str.getGrbit()) {
    case 0x1:
      grb = 0x1;
      break;
    case 0x5:
      grb = 0x1;
      break;
    case 0x9:
      grb = 0x1;
      break;
    case 0xd:
      grb = 0x1;
      break;
    default:
    }
    return grb;
  }

  Object[] continueDef;

  /**
   * return the sizes of Continue records for an Sst caches the read if
   * neccesary
   */
  public static Object[] getContinueDef(Sst rec, boolean cached) {
    if (cached) {
      return rec.continueDef;
    } else {
      Integer[] cbs = new Integer[rec.cbounds.size()];
      Byte[] sstgrs = new Byte[rec.sstgrbits.size()];
      for (int t = 0; t < cbs.length; t++) {
        cbs[t] = ((Integer) rec.cbounds.get(t));
      }

      for (int t = 0; t < sstgrs.length; t++) {
        sstgrs[t] = ((Byte) rec.sstgrbits.get(t));
      }

      rec.continueDef = new Object[2];
      rec.continueDef[0] = cbs;
      rec.continueDef[1] = sstgrs;
      return rec.continueDef;
    }

  }

  /**
   * Called from LabelSst on initialization from a new workbook, this
   * pre-populates the list of strings that are currently shared.
   */
  void initSharingOnStrings(int isst) {
    Integer iSst = Integer.valueOf(isst);
    if (existingSstEntries.contains(iSst)) {
      if (!dupeSstEntries.contains(iSst)) { // really is just a switch -doesn't track # times string is shared ...
        dupeSstEntries.add(iSst);
      }
    } else {
      existingSstEntries.add(iSst);
    }
  }

  // Optimization -- don't check UStr on add
  int STRING_ENCODING_MODE = Sst.STRING_ENCODING_UNICODE;

  public void setStringEncodingMode(int mode) {
    this.STRING_ENCODING_MODE = mode;
  }

  /**
   * remove a Unicodestring from the table
   *
   * @param idx
   */
  void removeUnicodestring(Unicodestring str) {
    this.stringvector.remove(idx);
    this.retpos--;
    this.reclen -= str.getLen();
  }

  /**
   * used when modifying existing sst entry update data + rec lens
   *
   * @param delta
   *            amt of adjustment
   */
  void adjustSstLength(int delta) {
    this.reclen += delta;
    this.datalen += delta;
  }

  /**
   * insert a new Unicodestring into the array of strings composing this
   * String Table
   */
  int insertUnicodestring(Unicodestring us) {
    int retpos = -1;
    cstTotal++;
    boolean isuni = false;
    // get the existing position of this string
    // but only if we're not ignoring dupes
    if (this.getWorkBook().isSharedupes()) {
      retpos = ((SstArrayList) stringvector).find(us); // indexOf will not
                                // match entire
                                // unicode
                                // string
                                // (including
                                // formatting)
    }
    if (retpos == -1) { // unicode string isn't in yet
      cstUnique++;
      int strlen = us.getLen();
      reclen += strlen + (us.isRichString() ? 5 : 3);
      datalen += strlen + (us.isRichString() ? 5 : 3);
      if (isuni) {
        reclen += strlen; // utf double encoding.
        datalen += strlen;
      }
      retpos = this.putString(us);
    } else {
      // this is a duplicate string, track it!
      dupeSstEntries.add(Integer.valueOf(retpos));
    }

    return retpos;
  }

  /**
   * create a new unicode string from string and formatting information and
   * add it to the Sst string array formatting runs, if present, contain list
   * of short[] {char index, font index} where char index is start index in
   * the string to apply font at font index
   *
   * @param s
   * @param formattingRuns
   * @return
   */
  int addUnicodestring(String s, ArrayList formattingRuns) {
    cstTotal++;
    cstUnique++;
    Unicodestring str = createUnicodeString(s, formattingRuns,
        STRING_ENCODING_MODE);

    reclen += str.getLen();
    datalen += str.getLen();
    retpos = this.putString(str);
    return retpos;
  }

  /**
   * Create a unicode string
   *
   * @param s
   * @param formattingRuns
   * @param ENCODINGMODE
   * @return
   */
  public static Unicodestring createUnicodeString(String s,
      ArrayList formattingRuns, int ENCODINGMODE) {
    try {
      boolean isuni = false;
      if (ENCODINGMODE == WorkBook.STRING_ENCODING_AUTO)
        isuni = ByteTools.isUnicode(s);
      else if (ENCODINGMODE == WorkBook.STRING_ENCODING_COMPRESSED)
        isuni = false;
      else if (ENCODINGMODE == WorkBook.STRING_ENCODING_UNICODE)
        isuni = true;
      if (formattingRuns != null)
        isuni = true;

      byte[] charbytes = s.getBytes(XLSConstants.DEFAULTENCODING);
      int strlen = charbytes.length; // .length();
      byte[] strbytes = null;

      // handle string sizes
      if (strlen * 2 > Short.MAX_VALUE)
        isuni = false; // can't fit larger than Short String length
      if (strlen > (Short.MAX_VALUE - 3)) { // if strlen is greater than
                          // the maximum value for
                          // excel cells, truncate
        strlen = Short.MAX_VALUE - 3; // maximum value
        charbytes = new byte[strlen];
        System.arraycopy(s.getBytes(XLSConstants.DEFAULTENCODING), 0,
            charbytes, 0, strlen);
      }

      if (formattingRuns != null)
        isuni = true;

      if (isuni) { // encode string bytes
        try {// if you use a string here for the encoding rather than a
            // reference to a static String, performance in JDK 4.2
            // will suffer. Why? Dunno, but it's bad!
          charbytes = s.getBytes(WorkBook.UNICODEENCODING);
        } catch (UnsupportedEncodingException e) {
          Logger.logWarn("error encoding string: " + e
              + " with default encoding 'UnicodeLittleUnmarked'");
        }
        if (formattingRuns == null)
          strbytes = new byte[charbytes.length + 3];
        else
          strbytes = new byte[charbytes.length + 5]; // need 2 extra
                                // bytes to
                                // store
                                // formatting
                                // run info
      } else
        strbytes = new byte[charbytes.length + 3];

      // given info, create strbytes for Unicode init
      int pos = 0;
      int encodedlen = charbytes.length;
      byte[] lenbytes = ByteTools.shortToLEBytes((short) strlen);
      strbytes[pos++] = lenbytes[0]; // cch bytes 0 & 1
      strbytes[pos++] = lenbytes[1];
      if (!isuni) {
        strbytes[pos++] = (byte) 0x0; // grbit byte 2
      } else {
        strbytes[pos++] = (byte) 0x1; // grbit byte 2
        if (formattingRuns != null) { //
          strbytes[pos - 1] |= 0x8; // set Rich Text attribute
          byte[] fr = ByteTools.shortToLEBytes((short) formattingRuns
              .size());
          strbytes[pos++] = fr[0]; // # formatting runs bytes 3 & 4
          strbytes[pos++] = fr[1];
        }
      }
      System.arraycopy(charbytes, 0, strbytes, pos, encodedlen);

      if (formattingRuns != null) {
        // formatting runs (charindex, fontindex)*n after string data
        byte[] frs = new byte[(formattingRuns.size() * 4)];
        for (int i = 0; i < formattingRuns.size(); i++) {
          short[] o = (short[]) formattingRuns.get(i);
          byte[] charIndex = ByteTools.shortToLEBytes(o[0]);
          byte[] fontIndex = ByteTools.shortToLEBytes(o[1]);
          System.arraycopy(charIndex, 0, frs, (i * 4), 2);
          System.arraycopy(fontIndex, 0, frs, (i * 4) + 2, 2);
        }
        // Append frs to end of strbytes
        byte[] newdata = new byte[strbytes.length + frs.length];
        System.arraycopy(strbytes, 0, newdata, 0, strbytes.length);
        System.arraycopy(frs, 0, newdata, strbytes.length, frs.length);
        strbytes = newdata;
      }
      // create a new one, set its data
      Unicodestring str = new Unicodestring();
      str.init(strbytes, false);
      return str;
    } catch (UnsupportedEncodingException e) {
      Logger.logWarn("error encoding string: " + e.toString());
    }
    return null;
  }

  /**
   * insert a new Unicodestring into the array of strings composing this
   * String Table
   */
  int insertUnicodestring(String s) {
    int retpos = -1;
    // get the existing position of this string
    // but only if we're not ignoring dupes
    if (this.getWorkBook().isSharedupes()) {
      retpos = ((SstArrayList) stringvector).indexOf(s);
      if (retpos > -1) {
        Unicodestring str = (Unicodestring) stringvector.get(retpos);
        if (str.hasFormattingRuns())
          retpos = -1; // do not match if there are formatting runs
                  // embedded
      }
    }

    if (retpos == -1) { // it's a new string
      retpos = addUnicodestring(s, null); // add with no formatting
                        // information
    } else {
      cstTotal++;
      // this is a duplicate string, track it!
      dupeSstEntries.add(Integer.valueOf(retpos));
    }

    return retpos;
  }

  /**
   * Determine if the isst passed in is for a duplicate string or not.
   */
  boolean isSharedString(int sstLoc) {
    if (dupeSstEntries.contains(Integer.valueOf(sstLoc)))
      return true;
    return false;
  }

  /**
   * Return the Unicodestring at the corresponding index
   */
  Unicodestring getUStringAt(int i) {
    return (Unicodestring) stringvector.get(i);
  }

  /**
   * find this unicode string (including formatting) in stringarray
   *
   * @param us
   * @return
   */
  int find(Unicodestring us) {
    return ((SstArrayList) stringvector).find(us);
  }

  /**
   * Returns the String vector
   */
  public List getStringVector() {
    return this.stringvector;
  }

  /**
   * return the total number of strings in the SST
   *
   * @return
   */
  public int getNumTotal() {
    return cstTotal;
  }

  /**
   * return the number of unique strings in the SST
   *
   * @return
   */
  public int getNumUnique() {
    return cstUnique;
  }

  /**
   * return # continues
   *
   * @return
   */
  public int getNumContinues() {
    return numconts;
  }

  /**
   * we need to override stream to update changes to the byte array
   */
  public void preStream() {
    this.updateUnicodestrings();
  }

  // For debugging purposes
  public String toString() {
    StringBuffer sb = new StringBuffer();
    sb.append("cstTotal:" + cstTotal + " cstUnique:" + cstUnique
        + " numConts:" + numconts);
    for (int i = 0; i < stringvector.size(); i++) {
      sb.append("\n " + stringvector.get(i));
    }
    return sb.toString();
  }

  /**
   * Override ArrayList to allow matching based on .toString. Required because
   * we call ArrayList.indexOf(String) when ArrayList contains UnicodeStrings.
   */
  private class SstArrayList extends ArrayList {
    /**
     * serialVersionUID
     */
    private static final long serialVersionUID = 7904551471519095640L;
    private HashMap container = new HashMap();

    public boolean put(Object o, Integer isst) {
      container.put(((Unicodestring) o).toCachingString(), isst);
      return super.add(o);
    }

    public int indexOf(Object o) {
      Object oo = container.get(o.toString());
      if (oo == null)
        return -1;
      return ((Integer) oo).intValue();
    }

    public boolean remove(Object o) {
      Logger.logWarn("String being removed from SST array, Indexing may be off");
      container.remove(((Unicodestring) o).toCachingString());
      return super.remove(o);
    }

    /**
     * find this particular unicode string, including formatting
     *
     * @param us
     * @return
     */
    public int find(Unicodestring us) {
      return (super.indexOf(us));
    }
  }

  /**
   * generate the OOXML necessary to describe this string table, also fill
   * sststrings list with unique sststrings
   *
   * @param sststrings
   * @return sstooxml
   * @throws IOException
   */
  public void writeOOXML(Writer zip) throws IOException {
    StringBuffer sstooxml = new StringBuffer();

    zip.write(OOXMLConstants.xmlHeader);
    zip.write("\r\n");
    zip.write(("<sst xmlns=\"" + OOXMLConstants.xmlns + "\" count=\""
        + cstTotal + "\" uniqueCount=\"" + cstUnique + "\">"));
    zip.write("\r\n");
    for (int i = 0; i < this.getStringVector().size(); i++) {
      Unicodestring us = ((Unicodestring) this.getStringVector().get(i));
      ArrayList frs = us.getFormattingRuns();
      String s = us.getStringVal();
      s = OOXMLAdapter.stripNonAscii(s).toString();
      // sststrings.add(OOXMLAdapter.stripNonAscii(s));// zip.write(s); //
      // used as an index for cell values in parsing sheet ooxml

      // TODO: below should be in Unicodestring as .getOOXML?
      zip.write("<si>");
      zip.write("\r\n");

      if (frs == null) { // no intra-string formattingz
        if (s.indexOf(" ") == 0 || s.lastIndexOf(" ") == s.length() - 1) {
          zip.write(("<t xml:space=\"preserve\">" + s + "</t>"));
        } else {
          zip.write(("<t>" + s + "</t>"));
        }
        zip.write("\r\n");
      } else { // have formatting runs which split up string into areas
            // with separate formats applied
        int begIdx = 0;
        for (int j = 0; j < frs.size(); j++) {
          short[] idxs = (short[]) frs.get(j);
          if (idxs[0] > begIdx) { // +1!!
            if (j == 0) {
              zip.write("<r>"); // new rich text run
              zip.write(("<t xml:space=\"preserve\">"
                  + OOXMLAdapter.stripNonAscii(s.substring(
                      begIdx, idxs[0])) + "</t>"));
              zip.write("</r>");
              zip.write("\r\n");
            } else {
              zip.write(("<t xml:space=\"preserve\">"
                  + OOXMLAdapter.stripNonAscii(s.substring(
                      begIdx, idxs[0])) + "</t>"));
              zip.write("</r>");
              zip.write("\r\n");
            }
            begIdx = idxs[0];
          }
          zip.write("<r>"); // new rich text run
          Ss_rPr rp = Ss_rPr.createFromFont(this.getWorkBook()
              .getFont(idxs[1]));
          zip.write(rp.getOOXML());
        }
        if (begIdx < s.length()) // output remaining string
          s = s.substring(begIdx);
        else
          s = "";
        zip.write(("<t xml:space=\"preserve\">"
            + OOXMLAdapter.stripNonAscii(s) + "</t>"));
        zip.write("\r\n");
        zip.write("</r>");
      }
      zip.write("</si>");
      zip.write("\r\n");
    }
    zip.write("</sst>");
    // return sstooxml.toString();
  }

  /**
   * given SharedStrings.xml OOXML inputstream, read in string and formatting
   * data, if any and parse into ArrayList for later use in parseSheetOOXML
   *
   * @param bk
   *            WorkBookHandle
   * @param ii
   *            InputStream
   * @return String ArrayList return list of shared strings
   * @see parseSheetOOXML
   */
  public static ArrayList parseOOXML(WorkBookHandle bk, InputStream ii) {
    // NOTE:
    // apparently can have dup entries in sharedstring.xml
    // index of string links to cell value so must keep dups here
    // reset after parsing
    boolean shareDups = false;
    if (bk.getWorkBook().isSharedupes()) {
      bk.getWorkBook().setSharedupes(false);
      shareDups = true;
    }
    try {
      XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
      factory.setNamespaceAware(true);
      XmlPullParser xpp = factory.newPullParser();

      xpp.setInput(ii, "UTF-8"); // using XML 1.0 specification
      int eventType = xpp.getEventType();
      while (eventType != XmlPullParser.END_DOCUMENT) {
        if (eventType == XmlPullParser.START_TAG) {
          String tnm = xpp.getName();
          if (tnm.equals("si")) { // parse si single string table
                      // entry
            String s = "";
            ArrayList formattingRuns = null;
            while (eventType != XmlPullParser.END_DOCUMENT) {
              if (eventType == XmlPullParser.START_TAG) {
                if (xpp.getName().equals("rPr")) { // intra-string
                                  // formatting
                                  // properties
                  int idx = s.length(); // index into
                              // character string
                              // to apply
                              // formatting to
                  Ss_rPr rp = (Ss_rPr) Ss_rPr.parseOOXML(xpp, bk)
                      .cloneElement();
                  Font f = rp.generateFont(bk); // NOW CONVERT
                                  // ss_rPr to
                                  // a font!!
                  int fIndex = bk.getWorkBook().getFontIdx(f); // index
                                          // for
                                          // specific
                                          // font
                                          // formatting
                  if (fIndex == -1) // must insert new font
                    fIndex = bk.getWorkBook().insertFont(f) + 1;
                  if (formattingRuns == null)
                    formattingRuns = new ArrayList();
                  formattingRuns.add(new short[] {
                      Integer.valueOf(idx).shortValue(),
                      Integer.valueOf(fIndex).shortValue() });
                } else if (xpp.getName().equals("t")) {
                  /*
                   * boolean bPreserve= false; if
                   * (xpp.getAttributeCount()>0) { if
                   * (xpp.getAttributeName(0).equals("space")
                   * &&
                   * xpp.getAttributeValue(0).equals("preserve"
                   * )) bPreserve= true; }
                   */
                  eventType = xpp.next();
                  while (eventType != XmlPullParser.END_DOCUMENT
                      && eventType != XmlPullParser.END_TAG
                      && eventType != XmlPullParser.TEXT) {
                    eventType = xpp.next();
                  }
                  if (eventType == XmlPullParser.TEXT) {
                    s += xpp.getText();
                  }
                }
              } else if (eventType == XmlPullParser.END_TAG
                  && xpp.getName().equals("si")) {
                bk.getWorkBook().getSharedStringTable()
                    .addUnicodestring(s, formattingRuns); // create
                                        // a
                                        // new
                                        // unicode
                                        // string
                                        // with
                                        // formatting
                                        // runs
                break;
              }
              eventType = xpp.next();
            }
          }
        } else if (eventType == XmlPullParser.END_TAG) {
        }
        eventType = xpp.next();
      }
    } catch (Exception e) {
      Logger.logErr("SST.parseXML: " + e.toString());
    }
    if (shareDups)
      bk.getWorkBook().setSharedupes(true);

    return (ArrayList) bk.getWorkBook().getSharedStringTable()
        .getStringVector();
  }

  /**
   * Returns all strings that are in the SharedStringTable for this workbook.
   * The SST contains all standard string records in cells, but may not
   * include such things as strings that are contained within formulas. This
   * is useful for such things as full text indexing of workbooks
   *
   * @return Strings in the workbook.
   */
  public ArrayList getAllStrings() {
    ArrayList al = new ArrayList(stringvector.size());
    for (int i = 0; i < stringvector.size(); i++) {
      al.add(stringvector.get(i).toString());
    }
    return al;
  }

  /**
   * Returns the length of this record, including the 4 header bytes
   */
  public int getLength() {
    int len = super.getLength();
    // if "hasGrbit" must account for additional size taken up by it
    // see ContinueHandler.createSstContinues
    for (int i = 0; i < sstgrbits.size() - 1; i++) {
      Byte b = (Byte) sstgrbits.get(i);
      if (b != null) {
        byte grbyte = b.byteValue();
        if ((grbyte < 0x2) && (grbyte >= 0x0)) // Sst grbit is either 0h
                            // or 1h, otherwise it's
                            // String data
          len++;
      }
    }
    return len;
  }
}
TOP

Related Classes of com.extentech.formats.XLS.Sst$SstArrayList

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.