Package org.apache.poi.hpbf.extractor

Examples of org.apache.poi.hpbf.extractor.PublisherTextExtractor


            Entry entry = (Entry) entries.next();
            String name = entry.getName();
            if (entry instanceof DirectoryEntry) {
               if ("Quill".equals(name)) {
                  setType(metadata, "application/x-mspublisher");
                  PublisherTextExtractor extractor =
                      new PublisherTextExtractor(filesystem);
                  xhtml.element("p", extractor.getText());
               }
            } else if (entry instanceof DocumentEntry) {
               if ("WordDocument".equals(name)) {
                   setType(metadata, "application/msword");
                   WordExtractor extractor = new WordExtractor(filesystem);

                   addTextIfAny(xhtml, "header", extractor.getHeaderText());

                   for (String paragraph : extractor.getParagraphText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getFootnoteText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getCommentsText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getEndnoteText()) {
                       xhtml.element("p", paragraph);
                   }

                   addTextIfAny(xhtml, "footer", extractor.getFooterText());
               } else if ("PowerPoint Document".equals(name)) {
                   setType(metadata, "application/vnd.ms-powerpoint");
                   PowerPointExtractor extractor =
                       new PowerPointExtractor(filesystem);
                   xhtml.element("p", extractor.getText(true, true));
               } else if ("Workbook".equals(name)) {
                   setType(metadata, "application/vnd.ms-excel");
                   Locale locale = context.get(Locale.class, Locale.getDefault());
                   new ExcelExtractor().parse(filesystem, xhtml, locale);
               } else if ("VisioDocument".equals(name)) {
                   setType(metadata, "application/vnd.visio");
                   VisioTextExtractor extractor =
                       new VisioTextExtractor(filesystem);
                   for (String text : extractor.getAllText()) {
                       xhtml.element("p", text);
                   }
               } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                   // TODO: Cleaner mechanism for detecting Outlook
                   outlookExtracted = true;
View Full Code Here


        case SOLIDWORKS_ASSEMBLY:
          break;
        case SOLIDWORKS_DRAWING:
          break;
        case PUBLISHER:
           PublisherTextExtractor publisherTextExtractor =
              new PublisherTextExtractor(root);
           xhtml.element("p", publisherTextExtractor.getText());
           break;
        case WORDDOCUMENT:
           new WordExtractor(context).parse(root, xhtml);
           break;
        case POWERPOINT:
View Full Code Here

                setType(metadata, type.getType());
            }

            switch (type) {
                case PUBLISHER:
                    PublisherTextExtractor publisherTextExtractor =
                        new PublisherTextExtractor(filesystem);
                    xhtml.element("p", publisherTextExtractor.getText());
                    break;
                case WORDDOCUMENT:
                    new WordExtractor(context).parse(filesystem, xhtml);
                    break;
                case POWERPOINT:
View Full Code Here

            setType(metadata, type.getType());
        }

        switch (type) {
        case PUBLISHER:
           PublisherTextExtractor publisherTextExtractor =
              new PublisherTextExtractor(root);
           xhtml.element("p", publisherTextExtractor.getText());
           break;
        case WORDDOCUMENT:
           new WordExtractor(context).parse(root, xhtml);
           break;
        case POWERPOINT:
View Full Code Here

        if (poifsDir.hasEntry("VisioDocument")) {
            return new VisioTextExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("Quill")) {
            return new PublisherTextExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("__substg1.0_1000001E") || poifsDir.hasEntry("__substg1.0_1000001F")
                || poifsDir.hasEntry("__substg1.0_0047001E")
                || poifsDir.hasEntry("__substg1.0_0047001F")
View Full Code Here

      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
            return new PublisherTextExtractor(poifsDir, fs);
         }
      if(
            entry.getName().equals("__substg1.0_1000001E") ||
               entry.getName().equals("__substg1.0_1000001F") ||
            entry.getName().equals("__substg1.0_0047001E") ||
View Full Code Here

                setType(metadata, type.getType());
            }

            switch (type) {
                case PUBLISHER:
                    PublisherTextExtractor publisherTextExtractor =
                        new PublisherTextExtractor(filesystem);
                    xhtml.element("p", publisherTextExtractor.getText());
                    break;
                case WORDDOCUMENT:
                    new WordExtractor(context).parse(filesystem, xhtml);
                    break;
                case POWERPOINT:
View Full Code Here

        if (poifsDir.hasEntry("VisioDocument")) {
            return new VisioTextExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("Quill")) {
            return new PublisherTextExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("__substg1.0_1000001E") || poifsDir.hasEntry("__substg1.0_1000001F")
                || poifsDir.hasEntry("__substg1.0_0047001E")
                || poifsDir.hasEntry("__substg1.0_0047001F")
View Full Code Here

      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
            return new PublisherTextExtractor(poifsDir, fs);
         }
      if(
                entry.getName().equals("__substg1.0_1000001E") ||
                entry.getName().equals("__substg1.0_1000001F") ||
                entry.getName().equals("__substg1.0_0047001E") ||
View Full Code Here

      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
            return new PublisherTextExtractor(poifsDir, fs);
         }
      if(
                entry.getName().equals("__substg1.0_1000001E") ||
                entry.getName().equals("__substg1.0_1000001F") ||
                entry.getName().equals("__substg1.0_0047001E") ||
View Full Code Here

TOP

Related Classes of org.apache.poi.hpbf.extractor.PublisherTextExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.