Package org.apache.poi.hwpf

Examples of org.apache.poi.hwpf.HWPFDocument


    /**
     * Bug 44331 - HWPFDocument.write destroys fields
     */
    public void test44431()
    {
        HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
        WordExtractor extractor1 = new WordExtractor( doc1 );

        HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
        WordExtractor extractor2 = new WordExtractor( doc2 );

        assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
        assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
        assertEquals( Arrays.toString( extractor1.getParagraphText() ),
View Full Code Here


    /**
     * Bug 44331 - HWPFDocument.write destroys fields
     */
    public void test44431_2()
    {
        HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
        WordExtractor extractor1 = new WordExtractor( doc1 );

        assertEquals( "File name=FieldsTest.doc\n" +
            "\n" +
            "\n" +
View Full Code Here

    /**
     * Bug 45473 - HWPF cannot read file after save
     */
    public void test45473()
    {
        HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug45473.doc" );
        String text1 = new WordExtractor( doc1 ).getText().trim();

        HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
        String text2 = new WordExtractor( doc2 ).getText().trim();

        // the text in the saved document has some differences in line
        // separators but we tolerate that
        assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
View Full Code Here

    /**
     * Bug 46220 - images are not properly extracted
     */
    public void test46220()
    {
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46220.doc" );
        // reference checksums as in Bugzilla
        String[] md5 = { "851be142bce6d01848e730cb6903f39e",
                "7fc6d8fb58b09ababd036d10a0e8c039",
                "a7dc644c40bc2fbf17b2b62d07f99248",
                "72d07b8db5fad7099d90bc4c304b4666" };
        List<Picture> pics = doc.getPicturesTable().getAllPictures();
        assertEquals( 4, pics.size() );
        for ( int i = 0; i < pics.size(); i++ )
        {
            Picture pic = pics.get( i );
            byte[] data = pic.getRawContent();
View Full Code Here

     * [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
     * missing
     */
    public void test46817()
    {
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
        WordExtractor extractor = new WordExtractor( doc );
        String text = extractor.getText().trim();

        assertTrue( text.contains( "Nazwa wykonawcy" ) );
        assertTrue( text.contains( "kujawsko-pomorskie" ) );
View Full Code Here

     *
     * @throws IOException
     */
    public void test47286() throws IOException
    {
        HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
        String text1 = new WordExtractor( doc1 ).getText().trim();

        HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
        String text2 = new WordExtractor( doc2 ).getText().trim();

        // the text in the saved document has some differences in line
        // separators but we tolerate that
        assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );

        assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
                .getCharacterTable().getTextRuns().size() );

        List<PlexOfField> expectedFields = doc1.getFieldsTables()
                .getFieldsPLCF( FieldsDocumentPart.MAIN );
        List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
                FieldsDocumentPart.MAIN );
        assertEquals( expectedFields.size(), actualFields.size() );

        assertTableStructures( doc1.getRange(), doc2.getRange() );
    }
View Full Code Here

     * [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
     * CharacterRun.replaceText()
     */
    public void test47287()
    {
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
        String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
                "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
        int usedVal = 0;
        String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
        Range r = doc.getRange();
        for ( int x = 0; x < r.numSections(); x++ )
        {
            Section s = r.getSection( x );
            for ( int y = 0; y < s.numParagraphs(); y++ )
            {
View Full Code Here

     * [RESOLVED FIXED] Bug 47731 - Word Extractor considers text copied from
     * some website as an embedded object
     */
    public void test47731() throws Exception
    {
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
        String foundText = new WordExtractor( doc ).getText();

        assertTrue( foundText
                .contains( "Soak the rice in water for three to four hours" ) );
    }
View Full Code Here

     */
    public void test47742() throws Exception
    {

        // (1) extract text from MS Word document via POI
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47742.doc" );
        String foundText = new WordExtractor( doc ).getText();

        // (2) read text from text document (retrieved by saving the word
        // document as text file using encoding UTF-8)
        InputStream is = POIDataSamples.getDocumentInstance()
View Full Code Here

    /**
     * Bug 47958 - Exception during Escher walk of pictures
     */
    public void test47958()
    {
        HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47958.doc" );
        doc.getPicturesTable().getAllPictures();
    }
View Full Code Here

TOP

Related Classes of org.apache.poi.hwpf.HWPFDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.