Package org.apache.poi.hwpf

Examples of org.apache.poi.hwpf.HWPFDocument


  }

  private void test(int rows, int columns) throws Exception {
    // POI apparently can't create a document from scratch,
    // so we need an existing empty dummy document
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile("empty.doc");

    Range range = doc.getRange();
    range.sanityCheck();

    Table table = range.insertTableBefore((short) columns, rows);
    table.sanityCheck();
View Full Code Here


         * reconstruct the structure of range -- sergey
         */
        List<Structure> structures = new LinkedList<Structure>();
        if ( wordDocument instanceof HWPFDocument )
        {
            final HWPFDocument doc = (HWPFDocument) wordDocument;

            Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks()
                    .getBookmarksStartedBetween( range.getStartOffset(),
                            range.getEndOffset() );

            if ( rangeBookmarks != null )
            {
                for ( List<Bookmark> lists : rangeBookmarks.values() )
                {
                    for ( Bookmark bookmark : lists )
                    {
                        if ( !bookmarkStack.contains( bookmark ) )
                            addToStructures( structures, new Structure(
                                    bookmark ) );
                    }
                }
            }

            // TODO: dead fields?
            int skipUntil = -1;
            for ( int c = 0; c < range.numCharacterRuns(); c++ )
            {
                CharacterRun characterRun = range.getCharacterRun( c );
                if ( characterRun == null )
                    throw new AssertionError();
                if ( characterRun.getStartOffset() < skipUntil )
                    continue;
                String text = characterRun.text();
                if ( text == null || text.length() == 0
                        || text.charAt( 0 ) != FIELD_BEGIN_MARK )
                    continue;

                Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
                        .getFieldByStartOffset( FieldsDocumentPart.MAIN,
                                characterRun.getStartOffset() );
                if ( aliveField != null )
                {
                    addToStructures( structures, new Structure( aliveField ) );
                }
                else
                {
                    int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
                            wordDocument, range, c );
                    if ( separatorEnd != null )
                    {
                        addToStructures(
                                structures,
                                new Structure( new DeadFieldBoundaries( c,
                                        separatorEnd[0], separatorEnd[1] ),
                                        characterRun.getStartOffset(), range
                                                .getCharacterRun(
                                                        separatorEnd[1] )
                                                .getEndOffset() ) );
                        c = separatorEnd[1];
                    }
                }
            }
        }

        structures = new ArrayList<Structure>( structures );
        Collections.sort( structures );

        int previous = range.getStartOffset();
        for ( Structure structure : structures )
        {
            if ( structure.start != previous )
            {
                Range subrange = new Range( previous, structure.start, range )
                {
                    @Override
                    public String toString()
                    {
                        return "BetweenStructuresSubrange " + super.toString();
                    }
                };
                processCharacters( wordDocument, currentTableLevel, subrange,
                        block );
            }

            if ( structure.structure instanceof Bookmark )
            {
                // other bookmarks with same boundaries
                List<Bookmark> bookmarks = new LinkedList<Bookmark>();
                for ( Bookmark bookmark : ( (HWPFDocument) wordDocument )
                        .getBookmarks()
                        .getBookmarksStartedBetween( structure.start,
                                structure.start + 1 ).values().iterator()
                        .next() )
                {
                    if ( bookmark.getStart() == structure.start
                            && bookmark.getEnd() == structure.end )
                    {
                        bookmarks.add( bookmark );
                    }
                }

                bookmarkStack.addAll( bookmarks );
                try
                {
                    int end = Math.min( range.getEndOffset(), structure.end );
                    Range subrange = new Range( structure.start, end, range )
                    {
                        @Override
                        public String toString()
                        {
                            return "BookmarksSubrange " + super.toString();
                        }
                    };

                    processBookmarks( wordDocument, block, subrange,
                            currentTableLevel, bookmarks );
                }
                finally
                {
                    bookmarkStack.removeAll( bookmarks );
                }
            }
            else if ( structure.structure instanceof Field )
            {
                Field field = (Field) structure.structure;
                processField( (HWPFDocument) wordDocument, range,
                        currentTableLevel, field, block );
            }
            else if ( structure.structure instanceof DeadFieldBoundaries )
            {
                DeadFieldBoundaries boundaries = (DeadFieldBoundaries) structure.structure;
                processDeadField( wordDocument, block, range,
                        currentTableLevel, boundaries.beginMark,
                        boundaries.separatorMark, boundaries.endMark );
            }
            else
            {
                throw new UnsupportedOperationException( "NYI: "
                        + structure.structure.getClass() );
            }

            previous = Math.min( range.getEndOffset(), structure.end );
        }

        if ( previous != range.getStartOffset() )
        {
            if ( previous > range.getEndOffset() )
            {
                logger.log( POILogger.WARN, "Latest structure in ", range,
                        " ended at #" + previous, " after range boundaries [",
                        range.getStartOffset() + "; " + range.getEndOffset(),
                        ")" );
                return true;
            }

            if ( previous < range.getEndOffset() )
            {
                Range subrange = new Range( previous, range.getEndOffset(),
                        range )
                {
                    @Override
                    public String toString()
                    {
                        return "AfterStructureSubrange " + super.toString();
                    }
                };
                processCharacters( wordDocument, currentTableLevel, subrange,
                        block );
            }
            return true;
        }

        for ( int c = 0; c < range.numCharacterRuns(); c++ )
        {
            CharacterRun characterRun = range.getCharacterRun( c );

            if ( characterRun == null )
                throw new AssertionError();

            if ( wordDocument instanceof HWPFDocument
                    && ( (HWPFDocument) wordDocument ).getPicturesTable()
                            .hasPicture( characterRun ) )
            {
                HWPFDocument newFormat = (HWPFDocument) wordDocument;
                Picture picture = newFormat.getPicturesTable().extractPicture(
                        characterRun, true );

                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
                        picture );
                continue;
            }

            String text = characterRun.text();
            if ( text.getBytes().length == 0 )
                continue;

            if ( characterRun.isSpecialCharacter() )
            {
                if ( text.charAt( 0 ) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processNoteAnchor( doc, characterRun, block );
                    continue;
                }
                if ( text.charAt( 0 ) == SPECCHAR_DRAWN_OBJECT
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processDrawnObject( doc, characterRun, block );
                    continue;
                }
                if ( characterRun.isOle2()
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processOle2( doc, characterRun, block );
                    continue;
                }
                if ( characterRun.isSymbol()
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processSymbol( doc, characterRun, block );
                    continue;
                }
            }

View Full Code Here

                    assertEquals(2, sheet.getRow(2).getCell(0).getNumericCellValue(), 0);
                    assertEquals(3, sheet.getRow(3).getCell(0).getNumericCellValue(), 0);
                    assertEquals(8, sheet.getRow(5).getCell(0).getNumericCellValue(), 0);
                } else if ("Document".equals(ole.getInstanceName())){
                    //creating a HWPF document
                    HWPFDocument doc = new HWPFDocument(data.getData());
                    String txt = doc.getRange().getParagraph(0).text();
                    assertEquals("OLE embedding is thoroughly unremarkable.\r", txt);
                }
            }

        }
View Full Code Here

    }

    private static String getHtmlText( final String sampleFileName,
            boolean emulatePictureStorage ) throws Exception
    {
        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
                .getDocumentInstance().openResourceAsStream( sampleFileName ) );

        Document newDocument = XMLHelper.getDocumentBuilderFactory().newDocumentBuilder().newDocument();
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                newDocument );
View Full Code Here

             InputStream data = ole.getObjectData().getData();
             if ("Worksheet".equals(name)) {
                 HSSFWorkbook wb = new HSSFWorkbook(data);
                 num_xls++;
             } else if ("Document".equals(name)) {
                 HWPFDocument doc = new HWPFDocument(data);
                 num_doc++;
             } else if ("Presentation".equals(name)) {
                 num_ppt++;
                 SlideShow ppt = new SlideShow(data);
             }
View Full Code Here

   *  embeded word documents
   * @throws Exception
   */
  public void testExtractFromEmbeded() throws Exception {
    POIFSFileSystem fs = new POIFSFileSystem(POIDataSamples.getSpreadSheetInstance().openResourceAsStream(filename3));
    HWPFDocument doc;
    WordExtractor extractor3;

    DirectoryNode dirA = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B7");
    DirectoryNode dirB = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B2");

    // Should have WordDocument and 1Table
    assertNotNull(dirA.getEntry("1Table"));
    assertNotNull(dirA.getEntry("WordDocument"));

    assertNotNull(dirB.getEntry("1Table"));
    assertNotNull(dirB.getEntry("WordDocument"));

    // Check each in turn
    doc = new HWPFDocument(dirA, fs);
    extractor3 = new WordExtractor(doc);

    assertNotNull(extractor3.getText());
    assertTrue(extractor3.getText().length() > 20);
    assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", extractor3
        .getText());
    assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle());
    assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject());

    doc = new HWPFDocument(dirB, fs);
    extractor3 = new WordExtractor(doc);

    assertNotNull(extractor3.getText());
    assertTrue(extractor3.getText().length() > 20);
    assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n",
View Full Code Here

    assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject());
  }

  public void testWithHeader() {
    // Non-unicode
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile(filename4);
    extractor = new WordExtractor(doc);

    assertEquals("First header column!\tMid header Right header!\n", extractor.getHeaderText());

    String text = extractor.getText();
View Full Code Here

    assertTrue(text.indexOf("This is a simple header") > -1);
  }

  public void testWithFooter() {
    // Non-unicode
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile(filename4);
    extractor = new WordExtractor(doc);

    assertEquals("Footer Left\tFooter Middle Footer Right\n", extractor.getFooterText());

    String text = extractor.getText();
View Full Code Here

    text = extractor.getText();
    assertTrue(text.indexOf("The footer, with") > -1);
  }

  public void testFootnote() {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile(filename6);
    extractor = new WordExtractor(doc);

    String[] text = extractor.getFootnoteText();
    StringBuffer b = new StringBuffer();
    for (int i = 0; i < text.length; i++) {
View Full Code Here

    assertTrue(b.toString().contains("TestFootnote"));
  }

  public void testEndnote() {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile(filename6);
    extractor = new WordExtractor(doc);

    String[] text = extractor.getEndnoteText();
    StringBuffer b = new StringBuffer();
    for (int i = 0; i < text.length; i++) {
View Full Code Here

TOP

Related Classes of org.apache.poi.hwpf.HWPFDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.