Examples of HWPFDocument


Examples of org.apache.poi.hwpf.HWPFDocument

  /**
   * Test (more "confirm" than test) that we have the general structure that we expect to have.
   */
  public void testDocStructure() throws Exception {

    HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
    Range range;
    Section section;
    Paragraph para;
    PAPX paraDef;

    // First, check overall
    range = daDoc.getOverallRange();
    assertEquals(1, range.numSections());
    assertEquals(5, range.numParagraphs());


    // Now, onto just the doc bit
    range = daDoc.getRange();

    assertEquals(1, range.numSections());
    assertEquals(1, daDoc.getSectionTable().getSections().size());
    section = range.getSection(0);

    assertEquals(5, section.numParagraphs());

    para = section.getParagraph(0);
    assertEquals(1, para.numCharacterRuns());
    assertEquals(introText, para.text());

    para = section.getParagraph(1);
    assertEquals(5, para.numCharacterRuns());
    assertEquals(fillerText, para.text());


    paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(2);
    assertEquals(132, paraDef.getStart());
    assertEquals(400, paraDef.getEnd());

    para = section.getParagraph(2);
    assertEquals(5, para.numCharacterRuns());
    assertEquals(originalText, para.text());


    paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(3);
    assertEquals(400, paraDef.getStart());
    assertEquals(438, paraDef.getEnd());

    para = section.getParagraph(3);
    assertEquals(1, para.numCharacterRuns());
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

  /**
   * Test that we can delete text (one instance) from our Range with Unicode text.
   */
  public void testRangeDeleteOne() throws Exception {

    HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));

    Range range = daDoc.getOverallRange();
    assertEquals(1, range.numSections());

    Section section = range.getSection(0);
    assertEquals(5, section.numParagraphs());

    Paragraph para = section.getParagraph(2);

    String text = para.text();
    assertEquals(originalText, text);

    int offset = text.indexOf(searchText);
    assertEquals(192, offset);

    int absOffset = para.getStartOffset() + offset;
    Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument());

    assertEquals(searchText, subRange.text());

    subRange.delete();

    // we need to let the model re-calculate the Range before we evaluate it
    range = daDoc.getRange();

    assertEquals(1, range.numSections());
    section = range.getSection(0);

    assertEquals(5, section.numParagraphs());
    para = section.getParagraph(2);

    text = para.text();
    assertEquals(expectedText2, text);

    // this can lead to a StringBufferOutOfBoundsException, so we will add it
    // even though we don't have an assertion for it
    Range daRange = daDoc.getRange();
    daRange.text();
  }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

  /**
   * Test that we can delete text (all instances of) from our Range with Unicode text.
   */
  public void testRangeDeleteAll() throws Exception {

    HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));

    Range range = daDoc.getRange();
    assertEquals(1, range.numSections());

    Section section = range.getSection(0);
    assertEquals(5, section.numParagraphs());

    Paragraph para = section.getParagraph(2);

    String text = para.text();
    assertEquals(originalText, text);

    boolean keepLooking = true;
    while (keepLooking) {
      // Reload the range every time
      range = daDoc.getRange();
      int offset = range.text().indexOf(searchText);
      if (offset >= 0) {

        int absOffset = range.getStartOffset() + offset;

        Range subRange = new Range(
          absOffset, (absOffset + searchText.length()), range.getDocument());

        assertEquals(searchText, subRange.text());

        subRange.delete();

      } else {
        keepLooking = false;
      }
    }

    // we need to let the model re-calculate the Range before we use it
    range = daDoc.getRange();

    assertEquals(1, range.numSections());
    section = range.getSection(0);

    assertEquals(5, section.numParagraphs());
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

  /**
   * Create a new Word Extractor
   * @param fs POIFSFileSystem containing the word file
   */
  public WordExtractor(POIFSFileSystem fs) throws IOException {
    this(new HWPFDocument(fs));
    this.fs = fs;
  }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

  public WordExtractor(POIFSFileSystem fs) throws IOException {
    this(new HWPFDocument(fs));
    this.fs = fs;
  }
  public WordExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
    this(new HWPFDocument(dir, fs));
    this.fs = fs;
  }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

      System.err.println("   HWPFLister <filename>");
      System.exit(1);
    }

    HWPFLister l = new HWPFLister(
        new HWPFDocument(new FileInputStream(args[0]))
    );
    l.dumpFIB();
  }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

             InputStream data = ole.getObjectData().getData();
             if ("Worksheet".equals(name)) {
                 HSSFWorkbook wb = new HSSFWorkbook(data);
                 num_xls++;
             } else if ("Document".equals(name)) {
                 HWPFDocument doc = new HWPFDocument(data);
                 num_doc++;
             } else if ("Presentation".equals(name)) {
                 num_ppt++;
                 SlideShow ppt = new SlideShow(data);
             }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

         * reconstruct the structure of range -- sergey
         */
        List<Structure> structures = new LinkedList<Structure>();
        if ( wordDocument instanceof HWPFDocument )
        {
            final HWPFDocument doc = (HWPFDocument) wordDocument;

            Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks()
                    .getBookmarksStartedBetween( range.getStartOffset(),
                            range.getEndOffset() );

            if ( rangeBookmarks != null )
            {
                for ( List<Bookmark> lists : rangeBookmarks.values() )
                {
                    for ( Bookmark bookmark : lists )
                    {
                        if ( !bookmarkStack.contains( bookmark ) )
                            addToStructures( structures, new Structure(
                                    bookmark ) );
                    }
                }
            }

            // TODO: dead fields?
            int skipUntil = -1;
            for ( int c = 0; c < range.numCharacterRuns(); c++ )
            {
                CharacterRun characterRun = range.getCharacterRun( c );
                if ( characterRun == null )
                    throw new AssertionError();
                if ( characterRun.getStartOffset() < skipUntil )
                    continue;
                String text = characterRun.text();
                if ( text == null || text.length() == 0
                        || text.charAt( 0 ) != FIELD_BEGIN_MARK )
                    continue;

                Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
                        .getFieldByStartOffset( FieldsDocumentPart.MAIN,
                                characterRun.getStartOffset() );
                if ( aliveField != null )
                {
                    addToStructures( structures, new Structure( aliveField ) );
                }
                else
                {
                    int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
                            wordDocument, range, c );
                    if ( separatorEnd != null )
                    {
                        addToStructures(
                                structures,
                                new Structure( new DeadFieldBoundaries( c,
                                        separatorEnd[0], separatorEnd[1] ),
                                        characterRun.getStartOffset(), range
                                                .getCharacterRun(
                                                        separatorEnd[1] )
                                                .getEndOffset() ) );
                        c = separatorEnd[1];
                    }
                }
            }
        }

        structures = new ArrayList<Structure>( structures );
        Collections.sort( structures );

        int previous = range.getStartOffset();
        for ( Structure structure : structures )
        {
            if ( structure.start != previous )
            {
                Range subrange = new Range( previous, structure.start, range )
                {
                    @Override
                    public String toString()
                    {
                        return "BetweenStructuresSubrange " + super.toString();
                    }
                };
                processCharacters( wordDocument, currentTableLevel, subrange,
                        block );
            }

            if ( structure.structure instanceof Bookmark )
            {
                // other bookmarks with same boundaries
                List<Bookmark> bookmarks = new LinkedList<Bookmark>();
                for ( Bookmark bookmark : ( (HWPFDocument) wordDocument )
                        .getBookmarks()
                        .getBookmarksStartedBetween( structure.start,
                                structure.start + 1 ).values().iterator()
                        .next() )
                {
                    if ( bookmark.getStart() == structure.start
                            && bookmark.getEnd() == structure.end )
                    {
                        bookmarks.add( bookmark );
                    }
                }

                bookmarkStack.addAll( bookmarks );
                try
                {
                    int end = Math.min( range.getEndOffset(), structure.end );
                    Range subrange = new Range( structure.start, end, range )
                    {
                        @Override
                        public String toString()
                        {
                            return "BookmarksSubrange " + super.toString();
                        }
                    };

                    processBookmarks( wordDocument, block, subrange,
                            currentTableLevel, bookmarks );
                }
                finally
                {
                    bookmarkStack.removeAll( bookmarks );
                }
            }
            else if ( structure.structure instanceof Field )
            {
                Field field = (Field) structure.structure;
                processField( (HWPFDocument) wordDocument, range,
                        currentTableLevel, field, block );
            }
            else if ( structure.structure instanceof DeadFieldBoundaries )
            {
                DeadFieldBoundaries boundaries = (DeadFieldBoundaries) structure.structure;
                processDeadField( wordDocument, block, range,
                        currentTableLevel, boundaries.beginMark,
                        boundaries.separatorMark, boundaries.endMark );
            }
            else
            {
                throw new UnsupportedOperationException( "NYI: "
                        + structure.structure.getClass() );
            }

            previous = Math.min( range.getEndOffset(), structure.end );
        }

        if ( previous != range.getStartOffset() )
        {
            if ( previous > range.getEndOffset() )
            {
                logger.log( POILogger.WARN, "Latest structure in ", range,
                        " ended at #" + previous, " after range boundaries [",
                        range.getStartOffset() + "; " + range.getEndOffset(),
                        ")" );
                return true;
            }

            if ( previous < range.getEndOffset() )
            {
                Range subrange = new Range( previous, range.getEndOffset(),
                        range )
                {
                    @Override
                    public String toString()
                    {
                        return "AfterStructureSubrange " + super.toString();
                    }
                };
                processCharacters( wordDocument, currentTableLevel, subrange,
                        block );
            }
            return true;
        }

        for ( int c = 0; c < range.numCharacterRuns(); c++ )
        {
            CharacterRun characterRun = range.getCharacterRun( c );

            if ( characterRun == null )
                throw new AssertionError();

            if ( wordDocument instanceof HWPFDocument
                    && ( (HWPFDocument) wordDocument ).getPicturesTable()
                            .hasPicture( characterRun ) )
            {
                HWPFDocument newFormat = (HWPFDocument) wordDocument;
                Picture picture = newFormat.getPicturesTable().extractPicture(
                        characterRun, true );

                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
                        picture );
                continue;
            }

            String text = characterRun.text();
            if ( text.getBytes().length == 0 )
                continue;

            if ( characterRun.isSpecialCharacter() )
            {
                if ( text.charAt( 0 ) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processNoteAnchor( doc, characterRun, block );
                    continue;
                }
                if ( text.charAt( 0 ) == SPECCHAR_DRAWN_OBJECT
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processDrawnObject( doc, characterRun, block );
                    continue;
                }
                if ( characterRun.isOle2()
                        && ( wordDocument instanceof HWPFDocument ) )
                {
                    HWPFDocument doc = (HWPFDocument) wordDocument;
                    processOle2( doc, characterRun, block );
                    continue;
                }
            }

View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

    {
        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
                .verifyAndBuildPOIFS( inputStream );
        try
        {
            return new HWPFDocument( poifsFileSystem );
        }
        catch ( OldWordFileFormatException exc )
        {
            return new HWPFOldDocument( poifsFileSystem );
        }
View Full Code Here

Examples of org.apache.poi.hwpf.HWPFDocument

        {
            System.out.println( "Word 95 not supported so far" );
            return;
        }

        HWPFDocument document = (HWPFDocument) _doc;
        Bookmarks bookmarks = document.getBookmarks();
        for ( int b = 0; b < bookmarks.getBookmarksCount(); b++ )
        {
            Bookmark bookmark = bookmarks.getBookmark( b );
            System.out.println( "[" + bookmark.getStart() + "; "
                    + bookmark.getEnd() + "): " + bookmark.getName() );
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.