Package org.odftoolkit.odfdom.incubator.doc.text

Source Code of org.odftoolkit.odfdom.incubator.doc.text.OdfEditableTextExtractor

/************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* Copyright 2009 IBM. All rights reserved.
*
* Use is subject to license terms.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0. You can also
* obtain a copy of the License at http://odftoolkit.org/docs/license.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*
************************************************************************/
package org.odftoolkit.odfdom.incubator.doc.text;

import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.odftoolkit.odfdom.doc.OdfDocument;

import org.odftoolkit.odfdom.doc.table.OdfTable;
import org.odftoolkit.odfdom.doc.table.OdfTableRow;
import org.odftoolkit.odfdom.dom.OdfContentDom;
import org.odftoolkit.odfdom.dom.OdfMetaDom;
import org.odftoolkit.odfdom.dom.OdfStylesDom;
import org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement;
import org.odftoolkit.odfdom.dom.element.office.OfficeMetaElement;
import org.odftoolkit.odfdom.dom.element.style.StyleMasterPageElement;
import org.odftoolkit.odfdom.dom.element.table.TableTableElement;
import org.odftoolkit.odfdom.dom.element.text.TextAElement;
import org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.w3c.dom.NodeList;

/**
* It's a sub class of OdfTextExtractor. It provides a method to return all the text
* that the user can typically edit in a document, including text in cotent.xml,
* header and footer in styles.xml, meta data in meta.xml.
*
* <p>This function can be used by search engine, and text analytic operations. </p>
*
*/
public class OdfEditableTextExtractor extends OdfTextExtractor {

  OdfDocument mDocument = null;
  OdfElement mElement = null;
  boolean mIsDocumentExtractor = false;

  /**
   * Constructor with an ODF document as a parameter
   * @param doc the ODF document whose editable text would be extracted.
   */
  private OdfEditableTextExtractor(OdfDocument doc) {
    mTextBuilder = new StringBuilder();
    mDocument = doc;
    mIsDocumentExtractor = true;
  }

  /**
   * Constructor with an ODF element as parameter
   * @param element the ODF element whose editable text would be extracted.
   */
  private OdfEditableTextExtractor(OdfElement element) {
    mTextBuilder = new StringBuilder();
    mElement = element;
    mIsDocumentExtractor = false;
  }

  /**
   * An instance of OdfEditableTextExtractor will be created to
   * extract the editable text content of an ODF element.
   * @param doc the ODF document whose text will be extracted.
   * @return An instance of OdfEditableTextExtractor
   */
  public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfDocument doc) {
    return new OdfEditableTextExtractor(doc);
  }

  /**
   * An instance of OdfEditableTextExtractor will be created to
   * extract the editable text content of an ODF element.
   * @param element the ODF element whose text will be extracted.
   * @return An instance of OdfEditableTextExtractor
   */
  public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfElement element) {
    return new OdfEditableTextExtractor(element);
  }

  /* (non-Javadoc)
   * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement)
   */
  @Override
  public void visit(DrawObjectElement element) {
    String embedDocPath = element.getXlinkHrefAttribute();
    OdfDocument embedDoc = ((OdfDocument) (((OdfContentDom) element.getOwnerDocument()).getDocument())).loadSubDocument(embedDocPath);
    if (embedDoc != null) {
      try {
        mTextBuilder.append(OdfEditableTextExtractor.newOdfEditableTextExtractor(embedDoc).getText());
      } catch (Exception e) {
        Logger.getLogger(OdfEditableTextExtractor.class.getName()).log(Level.SEVERE, null, e);
      }
    }
  }

  /* (non-Javadoc)
   * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement)
   */
  @Override
  public void visit(TextTrackedChangesElement ele) {
    return;
  }

  /* (non-Javadoc)
   * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextAElement)
   */
  @Override
  public void visit(TextAElement ele) {
    String link = ele.getXlinkHrefAttribute();
    mTextBuilder.append(link);
    appendElementText(ele);
  }

  /* (non-Javadoc)
   * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement)
   */
  @Override
  public void visit(TableTableElement ele) {
    OdfTable table = OdfTable.getInstance(ele);
    List<OdfTableRow> rowlist = table.getRowList();
    for (int i = 0; i < rowlist.size(); i++) {
      OdfTableRow row = rowlist.get(i);
      for (int j = 0; j < row.getCellCount(); j++) {
        mTextBuilder.append(row.getCellByIndex(j).getDisplayText()).append(TabChar);
      }
      mTextBuilder.append(NewLineChar);
    }
  }

  /**
   * Return the editable text content as a string
   * @return the editable text content as a string
   */
  @Override
  public String getText() {
    if (mIsDocumentExtractor) {
      return getDocumentText();
    } else {
      visit(mElement);
      return mTextBuilder.toString();
    }
  }

  private String getDocumentText() {
    StringBuilder builder = new StringBuilder();
    try {
      //Extract text from content.xml
      OdfEditableTextExtractor contentDomExtractor = newOdfEditableTextExtractor(mDocument.getContentRoot());
      builder.append(contentDomExtractor.getText());

      //Extract text from style.xml
      OdfStylesDom styleDom = mDocument.getStylesDom();
      if (styleDom != null) {
        StyleMasterPageElement masterpage = null;
        NodeList list = styleDom.getElementsByTagName("style:master-page");
        if (list.getLength() > 0) {
          masterpage = (StyleMasterPageElement) list.item(0);
        }
        if (masterpage != null) {
          builder.append(newOdfEditableTextExtractor(masterpage).getText());
        }
      }

      //Extract text from meta.xml
      OdfMetaDom metaDom = mDocument.getMetaDom();
      if (metaDom != null) {
        OdfElement root = metaDom.getRootElement();
        OfficeMetaElement officemeta = OdfElement.findFirstChildNode(OfficeMetaElement.class, root);
        if (officemeta != null) {
          builder.append(newOdfEditableTextExtractor(officemeta).getText());
        }
      }

      return builder.toString();
    } catch (Exception e) {
      Logger.getLogger(OdfEditableTextExtractor.class.getName()).severe(e.getMessage());
      return builder.toString();
    }
  }
}
TOP

Related Classes of org.odftoolkit.odfdom.incubator.doc.text.OdfEditableTextExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.