Package org.apache.poi.poifs.filesystem

Examples of org.apache.poi.poifs.filesystem.Entry


 
  public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
    // Look for certain entries in the stream, to figure it
    //  out from
    for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
      Entry entry = (Entry)entries.next();
     
      if(entry.getName().equals("Workbook")) {
        return new ExcelExtractor(fs);
      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(fs);
      }
    }
    throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
  }
View Full Code Here


      EmbeddedObjectRefSubRecord subRecord = findObjectRecord();

      int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
        String streamName = "MBD" + HexDump.toHex(streamId);

        Entry entry = poifs.getRoot().getEntry(streamName);
        if (entry instanceof DirectoryEntry) {
            return (DirectoryEntry) entry;
        } else {
            throw new IOException("Stream " + streamName + " was not an OLE2 directory");
        }
View Full Code Here

    protected void handleEmbededOfficeDoc(
            DirectoryEntry dir, XHTMLContentHandler xhtml)
            throws IOException, SAXException, TikaException {
        // Is it an embedded OLE2 document, or an embedded OOXML document?
        try {
            Entry ooxml = dir.getEntry("Package");

            // It's OOXML
            TikaInputStream stream = TikaInputStream.get(
                    new DocumentInputStream((DocumentEntry) ooxml));
            try {
                ZipContainerDetector detector = new ZipContainerDetector();
                MediaType type = detector.detect(stream, new Metadata());
                handleEmbeddedResource(stream, null, type.toString(), xhtml, true);
                return;
            } finally {
                stream.close();
            }
        } catch(FileNotFoundException e) {
            // It's regular OLE2
        }

       // Need to dump the directory out to a new temp file, so
       //  it's stand along
       POIFSFileSystem newFS = new POIFSFileSystem();
       copy(dir, newFS.getRoot());

       File tmpFile = File.createTempFile("tika", ".ole2");
       try {
           FileOutputStream out = new FileOutputStream(tmpFile);
           newFS.writeFilesystem(out);
           out.close();

           // What kind of document is it?
           Metadata metadata = new Metadata();
           POIFSDocumentType type = POIFSDocumentType.detectType(dir);

           TikaInputStream embedded;

           if (type==POIFSDocumentType.OLE10_NATIVE) {
               Entry entry = dir.getEntry(Ole10Native.OLE10_NATIVE);
               ByteArrayOutputStream bos = new ByteArrayOutputStream();
               IOUtils.copy(new DocumentInputStream((DocumentEntry) entry), bos);
               byte[] data = bos.toByteArray();

               try {
View Full Code Here

        boolean outlookExtracted = false;
        POIFSFileSystem filesystem = new POIFSFileSystem(stream);
        Iterator<?> entries = filesystem.getRoot().getEntries();
        while (entries.hasNext()) {
            Entry entry = (Entry) entries.next();
            String name = entry.getName();
            if (!(entry instanceof DocumentEntry)) {
                // Skip directory entries
            } else if (SUMMARY_INFORMATION.equals(name)
                    || DOCUMENT_SUMMARY_INFORMATION.equals(name)) {
                parse((DocumentEntry) entry, metadata);
View Full Code Here

        {
            return new OutlookTextExtactor(poifsDir);
        }

        for (Iterator<Entry> entries = poifsDir.getEntries(); entries.hasNext();) {
            Entry entry = entries.next();

            if (entry.getName().equals("Package")) {
                OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
                return createExtractor(pkg);
            }
        }
        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
View Full Code Here

    if(ext instanceof ExcelExtractor) {
      // These are in MBD... under the root
      Iterator<Entry> it = root.getEntries();
      while(it.hasNext()) {
        Entry entry = it.next();
        if(entry.getName().startsWith("MBD")) {
          dirs.add(entry);
        }
      }
    } else if(ext instanceof WordExtractor) {
      // These are in ObjectPool -> _... under the root
      try {
        DirectoryEntry op = (DirectoryEntry)
                root.getEntry("ObjectPool");
        Iterator<Entry> it = op.getEntries();
        while(it.hasNext()) {
          Entry entry = it.next();
          if(entry.getName().startsWith("_")) {
            dirs.add(entry);
          }
        }
      } catch(FileNotFoundException e) {}
    } else if(ext instanceof PowerPointExtractor) {
View Full Code Here

    throws WritingNotSupportedException, IOException
    {
        /* If there is already an entry with the same name, remove it. */
        try
        {
            final Entry e = dir.getEntry(name);
            e.delete();
        }
        catch (FileNotFoundException ex)
        {
            /* Entry not found, no need to remove it. */
        }
View Full Code Here

        EmbeddedObjectRefSubRecord subRecord = findObjectRecord();

        int streamId = subRecord.getStreamId().intValue();
        String streamName = "MBD" + HexDump.toHex(streamId);

        Entry entry = _poifs.getRoot().getEntry(streamName);
        if (entry instanceof DirectoryEntry) {
            return (DirectoryEntry) entry;
        }
        throw new IOException("Stream " + streamName + " was not an OLE2 directory");
    }
View Full Code Here

    DirectoryEntry root = source.getRoot();
    DirectoryEntry newRoot = target.getRoot();

    Iterator<Entry> entries = root.getEntries();
    while (entries.hasNext()) {
      Entry entry = entries.next();
      if (!excepts.contains(entry.getName())) {
        copyNodeRecursively(entry,newRoot);
      }
    }
  }
View Full Code Here

  }
  public static POIOLE2TextExtractor createExtractor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
    // Look for certain entries in the stream, to figure it
    //  out from
    for(Iterator<Entry> entries = poifsDir.getEntries(); entries.hasNext(); ) {
      Entry entry = entries.next();
     
      if(entry.getName().equals("Workbook")) {
         if(getPreferEventExtractor()) {
               return new EventBasedExcelExtractor(poifsDir, fs);
         } else {
            return new ExcelExtractor(poifsDir, fs);
         }
      }
      if(entry.getName().equals("WordDocument")) {
          // Old or new style word document?
          try {
              return new WordExtractor(poifsDir, fs);
          } catch(OldWordFileFormatException e) {
              return new Word6Extractor(poifsDir, fs);
          }
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
            return new PublisherTextExtractor(poifsDir, fs);
         }
      if(
                entry.getName().equals("__substg1.0_1000001E") ||
                entry.getName().equals("__substg1.0_1000001F") ||
                entry.getName().equals("__substg1.0_0047001E") ||
                entry.getName().equals("__substg1.0_0047001F") ||
                entry.getName().equals("__substg1.0_0037001E") ||
                entry.getName().equals("__substg1.0_0037001F")
      ) {
         return new OutlookTextExtactor(poifsDir, fs);
      }
    }
    throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
View Full Code Here

TOP

Related Classes of org.apache.poi.poifs.filesystem.Entry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.