/**
* Copyright (c) 2003-2004, www.pdfbox.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of pdfbox; nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* http://www.pdfbox.org
*/
package test.pdfbox.util;
import java.io.File;
import java.io.FilenameFilter;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
/**
* Test the performance of the PDF text stripper utility.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.4 $
*/
public class TestTextStripperPerformance extends TestCase
{
/**
* Test class constructor.
*
* @param name The name of the test class.
*/
public TestTextStripperPerformance( String name )
{
super( name );
}
/**
* Test suite setup.
*/
public void setUp()
{
}
/**
* Validate text extraction on a single file.
*
* @param file The file to validate
* @param bLogResult Whether to log the extracted text
* @throws Exception when there is an exception
*/
public void doTestFile(File file, boolean bLogResult)
throws Exception
{
PDFTextStripper stripper = new PDFTextStripper();
OutputStream os = null;
Writer writer = null;
PDDocument document = null;
try
{
document = PDDocument.load(file);
File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt");
os = new FileOutputStream(outFile);
writer = new OutputStreamWriter(os);
stripper.writeText(document, writer);
}
finally
{
if( writer != null )
{
writer.close();
}
if( os != null )
{
os.close();
}
if( document != null )
{
document.close();
}
}
}
/**
* Test to validate text extraction of file set.
*
* @throws Exception when there is an exception
*/
public void testExtract()
throws Exception
{
String filename = System.getProperty("test.pdfbox.util.TextStripper.file");
File testDir = new File("test/input");
if ((filename == null) || (filename.length() == 0))
{
File[] testFiles = testDir.listFiles(new FilenameFilter()
{
public boolean accept(File dir, String name)
{
return (name.endsWith(".pdf"));
}
});
for (int n = 0; n < testFiles.length; n++)
{
doTestFile(testFiles[n], false);
}
}
else
{
//doTestFile(new File(testDir, filename), true);
}
}
/**
* Set the tests in the suite for this test class.
*
* @return the Suite.
*/
public static Test suite()
{
return new TestSuite( TestTextStripperPerformance.class );
}
/**
* Command line execution.
*
* @param args Command line arguments.
*/
public static void main( String[] args )
{
String[] arg = {TestTextStripperPerformance.class.getName() };
junit.textui.TestRunner.main( arg );
}
}