/*
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2001 - 2013 Object Refinery Ltd, Pentaho Corporation and Contributors.. All rights reserved.
*/
package org.pentaho.reporting.engine.classic.core.modules.output.table.html;
import java.io.IOException;
import org.pentaho.reporting.engine.classic.core.AttributeNames;
import org.pentaho.reporting.engine.classic.core.InvalidReportStateException;
import org.pentaho.reporting.engine.classic.core.ReportAttributeMap;
import org.pentaho.reporting.engine.classic.core.layout.model.BlockRenderBox;
import org.pentaho.reporting.engine.classic.core.layout.model.CanvasRenderBox;
import org.pentaho.reporting.engine.classic.core.layout.model.InlineRenderBox;
import org.pentaho.reporting.engine.classic.core.layout.model.LayoutNodeTypes;
import org.pentaho.reporting.engine.classic.core.layout.model.ParagraphRenderBox;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderBox;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderNode;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderableComplexText;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderableReplacedContent;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderableReplacedContentBox;
import org.pentaho.reporting.engine.classic.core.layout.model.RenderableText;
import org.pentaho.reporting.engine.classic.core.layout.model.SpacerRenderNode;
import org.pentaho.reporting.engine.classic.core.layout.model.context.BoxDefinition;
import org.pentaho.reporting.engine.classic.core.layout.output.OutputProcessorMetaData;
import org.pentaho.reporting.engine.classic.core.layout.process.text.RichTextSpec;
import org.pentaho.reporting.engine.classic.core.layout.text.GlyphList;
import org.pentaho.reporting.engine.classic.core.modules.output.table.base.DefaultTextExtractor;
import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlOutputProcessingException;
import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTagHelper;
import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTextExtractorHelper;
import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTextExtractorState;
import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.StyleBuilder;
import org.pentaho.reporting.engine.classic.core.style.ElementStyleKeys;
import org.pentaho.reporting.engine.classic.core.style.StyleSheet;
import org.pentaho.reporting.engine.classic.core.util.InstanceID;
import org.pentaho.reporting.libraries.repository.ContentIOException;
import org.pentaho.reporting.libraries.xmlns.common.AttributeList;
import org.pentaho.reporting.libraries.xmlns.writer.CharacterEntityParser;
import org.pentaho.reporting.libraries.xmlns.writer.HtmlCharacterEntities;
import org.pentaho.reporting.libraries.xmlns.writer.XmlWriter;
import org.pentaho.reporting.libraries.xmlns.writer.XmlWriterSupport;
/**
* Creation-Date: 02.11.2007, 15:58:29
*
* @author Thomas Morgner
*/
public class HtmlTextExtractor extends DefaultTextExtractor
{
private static final String DIV_TAG = "div";
private static final String BR_TAG = "br";
private XmlWriter xmlWriter;
private StyleBuilder styleBuilder;
private CharacterEntityParser characterEntityParser;
private boolean result;
private HtmlTextExtractorState processStack;
private HtmlTextExtractorHelper textExtractorHelper;
public HtmlTextExtractor(final OutputProcessorMetaData metaData,
final XmlWriter xmlWriter,
final HtmlContentGenerator contentGenerator,
final HtmlTagHelper tagHelper)
{
super(metaData);
if (xmlWriter == null)
{
throw new NullPointerException();
}
if (contentGenerator == null)
{
throw new NullPointerException();
}
this.xmlWriter = xmlWriter;
this.styleBuilder = tagHelper.getStyleBuilder();
this.characterEntityParser = HtmlCharacterEntities.getEntityParser();
this.textExtractorHelper = new HtmlTextExtractorHelper(tagHelper, xmlWriter, metaData, contentGenerator);
}
public boolean performOutput(final RenderBox content, final StyleBuilder.StyleCarrier[] cellStyle) throws IOException
{
styleBuilder.clear();
clearText();
setRawResult(null);
result = false;
processStack = new HtmlTextExtractorState(null, false, cellStyle);
textExtractorHelper.setFirstElement(content.getInstanceId(), processStack);
try
{
final int nodeType = content.getNodeType();
if (nodeType == LayoutNodeTypes.TYPE_BOX_PARAGRAPH)
{
processInitialBox((ParagraphRenderBox) content);
}
else if (nodeType == LayoutNodeTypes.TYPE_BOX_CONTENT)
{
processRenderableContent((RenderableReplacedContentBox) content);
}
else
{
processBoxChilds(content);
}
}
finally
{
processStack = null;
}
return result;
}
/**
* Prints the contents of a canvas box. This can happen only once per cell, as every canvas box creates its
* own cell at some point. If for some strange reason a canvas box appears in the middle of a box-structure,
* your layouter is probably a mess and this method will treat the box as a generic content container.
*
* @param box the canvas box
* @return true, if the child content will be processed, false otherwise.
*/
protected boolean startCanvasBox(final CanvasRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return false;
}
return textExtractorHelper.startBox
(box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box.getBoxDefinition(), false);
}
protected void finishCanvasBox(final CanvasRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return;
}
textExtractorHelper.finishBox(box.getInstanceId(), box.getAttributes());
}
/**
* Prints a paragraph cell. This is a special entry point used by the processContent method and is never
* called from elsewhere. This method assumes that the attributes of the paragraph have been processed as
* part of the table-cell processing.
*
* @param box the paragraph box
* @throws IOException if an IO error occured.
*/
protected void processInitialBox(final ParagraphRenderBox box) throws IOException
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return;
}
final StyleSheet styleSheet = box.getStyleSheet();
final String target = (String) styleSheet.getStyleProperty(ElementStyleKeys.HREF_TARGET);
if (target != null)
{
textExtractorHelper.handleLinkOnElement(styleSheet, target);
processStack = new HtmlTextExtractorState(processStack, true);
}
else
{
processStack = new HtmlTextExtractorState(processStack, false);
}
if (Boolean.TRUE.equals
(box.getAttributes().getAttribute(AttributeNames.Html.NAMESPACE, AttributeNames.Html.SUPPRESS_CONTENT)) == false)
{
processParagraphChilds(box);
}
if (processStack.isWrittenTag())
{
xmlWriter.writeCloseTag();
}
processStack = processStack.getParent();
}
protected void addEmptyBreak()
{
try
{
xmlWriter.writeText(" ");
}
catch (final IOException e)
{
throw new HtmlOutputProcessingException("Failed to perform IO", e);
}
}
protected void addSoftBreak()
{
try
{
xmlWriter.writeText(" ");
}
catch (final IOException e)
{
throw new HtmlOutputProcessingException("Failed to perform IO", e);
}
}
protected void addLinebreak()
{
try
{
result = true;
xmlWriter.writeTag(HtmlPrinter.XHTML_NAMESPACE, BR_TAG, XmlWriterSupport.CLOSE);
}
catch (final IOException e)
{
throw new HtmlOutputProcessingException("Failed to perform IO", e);
}
}
protected boolean startBlockBox(final BlockRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return false;
}
return textExtractorHelper.startBox
(box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box.getBoxDefinition(), true);
}
protected void finishBlockBox(final BlockRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return;
}
textExtractorHelper.finishBox(box.getInstanceId(), box.getAttributes());
}
/**
* Like a canvas box, a row-box should be split into several cells already. Therefore we treat it as a generic
* content container instead.
*/
protected boolean startRowBox(final RenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return false;
}
return textExtractorHelper.startBox
(box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box.getBoxDefinition(), true);
}
protected void finishRowBox(final RenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return;
}
textExtractorHelper.finishBox(box.getInstanceId(), box.getAttributes());
}
protected boolean startInlineBox(final InlineRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return false;
}
return textExtractorHelper.startInlineBox
(box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box.getBoxDefinition());
}
protected void finishInlineBox(final InlineRenderBox box)
{
if (box.getStaticBoxLayoutProperties().isVisible() == false)
{
return;
}
textExtractorHelper.finishBox(box.getInstanceId(), box.getAttributes());
}
protected void processOtherNode(final RenderNode node)
{
try
{
final int nodeType = node.getNodeType();
if (nodeType == LayoutNodeTypes.TYPE_NODE_TEXT ||
nodeType == LayoutNodeTypes.TYPE_NODE_COMPLEX_TEXT)
{
super.processOtherNode(node);
return;
}
if (node.isVirtualNode())
{
return;
}
if (nodeType == LayoutNodeTypes.TYPE_NODE_SPACER)
{
final SpacerRenderNode spacer = (SpacerRenderNode) node;
final int count = Math.max(1, spacer.getSpaceCount());
for (int i = 0; i < count; i++)
{
xmlWriter.writeText(" ");
}
}
}
catch (final IOException e)
{
throw new RuntimeException("Failed", e);
}
}
protected void processRenderableContent(final RenderableReplacedContentBox node)
{
try
{
final ReportAttributeMap map = node.getAttributes();
final AttributeList attrs = new AttributeList();
HtmlTagHelper.applyHtmlAttributes(map, attrs);
if (attrs.isEmpty() == false)
{
xmlWriter.writeTag(HtmlPrinter.XHTML_NAMESPACE, DIV_TAG, attrs, XmlWriterSupport.OPEN);
}
textExtractorHelper.writeLocalAnchor(node.getStyleSheet());
final StyleSheet styleSheet = node.getStyleSheet();
final String target = (String) styleSheet.getStyleProperty(ElementStyleKeys.HREF_TARGET);
if (target != null)
{
textExtractorHelper.handleLinkOnElement(styleSheet, target);
}
processReplacedContent(node);
if (target != null)
{
xmlWriter.writeCloseTag();
}
if (attrs.isEmpty() == false)
{
xmlWriter.writeCloseTag();
}
}
catch (final IOException e)
{
throw new RuntimeException("Failed", e);
}
catch (final ContentIOException e)
{
throw new RuntimeException("Failed", e);
}
}
/**
* @noinspection StringConcatenation
*/
private void processReplacedContent(final RenderableReplacedContentBox node) throws IOException, ContentIOException
{
final RenderableReplacedContent rc = node.getContent();
final ReportAttributeMap attrs = node.getAttributes();
final long width = node.getWidth();
final long height = node.getHeight();
final long contentWidth = rc.getContentWidth();
final long contentHeight = rc.getContentHeight();
final StyleSheet styleSheet = node.getStyleSheet();
final Object rawObject = rc.getRawObject();
// We have to do three things here. First, we have to check what kind
// of content we deal with.
if (textExtractorHelper.processRenderableReplacedContent(attrs, styleSheet, width, height, contentWidth, contentHeight, rawObject))
{
result = true;
}
}
protected void drawText(final RenderableText renderableText, final long contentX2)
{
try
{
if (renderableText.getLength() == 0)
{
// This text is empty.
return;
}
if (renderableText.isNodeVisible(getParagraphBounds(), isOverflowX(), isOverflowY()) == false)
{
return;
}
final String text;
final GlyphList gs = renderableText.getGlyphs();
final int maxLength = renderableText.computeMaximumTextSize(contentX2);
text = gs.getText(renderableText.getOffset(), maxLength, getCodePointBuffer());
if (text.length() > 0)
{
xmlWriter.writeText(characterEntityParser.encodeEntities(text));
if (text.trim().length() > 0)
{
result = true;
}
clearText();
}
}
catch (final IOException ioe)
{
throw new InvalidReportStateException("Failed to write text", ioe);
}
}
protected void drawComplexText(final RenderableComplexText renderableComplexText)
{
try
{
if (renderableComplexText.getRichText().isEmpty())
{
// This text is empty.
return;
}
if (renderableComplexText.isNodeVisible(getParagraphBounds(), isOverflowX(), isOverflowY()) == false)
{
return;
}
// iterate through all inline elements
for (final RichTextSpec.StyledChunk styledChunk : renderableComplexText.getRichText().getStyleChunks())
{
RenderNode node = styledChunk.getOriginatingTextNode();
InstanceID dummy = node.getInstanceId();
textExtractorHelper.startInlineBox(dummy,
styledChunk.getOriginalAttributes(), styledChunk.getStyleSheet(), BoxDefinition.EMPTY);
if (node instanceof RenderableReplacedContentBox)
{
processRenderableContent((RenderableReplacedContentBox) node);
result = true;
}
else
{
String text = styledChunk.getText();
xmlWriter.writeText(characterEntityParser.encodeEntities(text));
if (text.trim().length() > 0)
{
result = true;
}
}
textExtractorHelper.finishBox(dummy, styledChunk.getOriginalAttributes());
clearText();
}
}
catch (final IOException ioe)
{
throw new InvalidReportStateException("Failed to write text", ioe);
}
}
}