Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


              }
            }
            // ask Tika too
            final Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, "dummy." + key);
            MediaType mediaType = detector.detect(null, metadata);
            // unravel to least specific
            while (mediaType != null) {
              detected.add(mediaType.getBaseType().toString());
              mediaType = tikaConfig.getMediaTypeRegistry().getSupertype(mediaType);
            }
            return detected;
          }
        });
View Full Code Here


    Metadata metadata = new Metadata();
    if (fileName != null) {
      metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
    }

    MediaType mediaType;
    try (final TikaInputStream tis = TikaInputStream.get(input)) {
      mediaType = detector.detect(tis, metadata);
    }

    // unravel to least specific
    while (mediaType != null) {
      detected.add(mediaType.getBaseType().toString());
      mediaType = tikaConfig.getMediaTypeRegistry().getSupertype(mediaType);
    }

    return detected;
  }
View Full Code Here

        for (Map.Entry<File, String> entry : files.entrySet()) {
            String name = entry.getValue();
            File file = entry.getKey();

            byte[] buf = IOUtils.toByteArray(new FileInputStream(file));
            MediaType mediaType = mimeTypes.
                    detect(new ByteArrayInputStream(buf), new Metadata());
            MimeType mimeType = mimeTypes.forName(mediaType.toString());
            FileBody fb = new FileBody(file, name + mimeType.getExtension(),
                    mimeType.getName(), Consts.UTF_8.name());

            multipartEntity.addPart("files[" + x + "]", fb);
            x++;
View Full Code Here

            if (name == null) {
                name = "file" + count++;
            }

            MediaType contentType = detector.detect(inputStream, metadata);

            if (name.indexOf('.')==-1 && contentType!=null) {
                try {
                    name += config.getMimeRepository().forName(
                            contentType.toString()).getExtension();
                } catch (MimeTypeException e) {
                    e.printStackTrace();
                }
            }
View Full Code Here

     * @param mimetype
     *            the Mimetype
     * @return true if the Metadata object can be converted or false if not
     */
    public static boolean isConverterAvailable(String mimetype) {
        MediaType type = MediaType.parse( mimetype );

        if (type != null) {
            return (getConverterMap().get( type ) != null);
        }

View Full Code Here

            throw new IllegalArgumentException( "mimetype must not be null" );
        }

        ITikaToXMPConverter converter = null;

        MediaType type = MediaType.parse( mimetype );

        if (type != null) {
            Class<? extends ITikaToXMPConverter> clazz = getConverterMap().get( type );
            if (clazz != null) {
                try {
                    converter = clazz.newInstance();
                }
                catch (Exception e) {
                    throw new TikaException(
                            "TikaToXMP converter class cannot be instantiated for mimetype: "
                                    + type.toString(), e );
                }
            }
        }

        return converter;
View Full Code Here

                return charset;
            }
        }

        // Try determining the encoding based on hints in document metadata
        MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
        if (type != null) {
            String charset = type.getParameters().get("charset");
            if (charset != null) {
                try {
                    return CharsetUtils.forName(charset);
                } catch (Exception e) {
                    // ignore
View Full Code Here

        for (MediaType type : registry.getTypes()) {
            System.out.println(type);
            for (MediaType alias : registry.getAliases(type)) {
                System.out.println("  alias:     " + alias);
            }
            MediaType supertype = registry.getSupertype(type);
            if (supertype != null) {
                System.out.println("  supertype: " + supertype);
            }
            Parser p = parsers.get(type);
            if (p != null) {
View Full Code Here

            TikaInputStream stream = TikaInputStream.get(
                    new DocumentInputStream((DocumentEntry) ooxml));
            try {
                ZipContainerDetector detector = new ZipContainerDetector();
                MediaType type = detector.detect(stream, new Metadata());
                handleEmbeddedResource(stream, null, type.toString(), xhtml, true);
                return;
            } finally {
                stream.close();
            }
        }

        // It's regular OLE2:

        // What kind of document is it?
        Metadata metadata = new Metadata();
        POIFSDocumentType type = POIFSDocumentType.detectType(dir);
        TikaInputStream embedded = null;

        try {
            if (type == POIFSDocumentType.OLE10_NATIVE) {
                try {
                    // Try to un-wrap the OLE10Native record:
                    Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)dir);
                    metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel());
                   
                    byte[] data = ole.getDataBuffer();
                    embedded = TikaInputStream.get(data);
                } catch (Ole10NativeException ex) {
                    // Not a valid OLE10Native record, skip it
                }
            } else if (type == POIFSDocumentType.COMP_OBJ) {
                try {
                   // Grab the contents and process
                   DocumentEntry contentsEntry = (DocumentEntry)dir.getEntry("CONTENTS");
                   DocumentInputStream inp = new DocumentInputStream(contentsEntry);
                   byte[] contents = new byte[contentsEntry.getSize()];
                   inp.readFully(contents);
                   embedded = TikaInputStream.get(contents);
                  
                   // Try to work out what it is
                   MediaType mediaType = getDetector().detect(embedded, new Metadata());
                   String extension = type.getExtension();
                   try {
                      MimeType mimeType = getMimeTypes().forName(mediaType.toString());
                      extension = mimeType.getExtension();
                   } catch(MimeTypeException mte) {
                      // No details on this type are known
                   }
                  
                   // Record what we can do about it
                   metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
                   metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + extension);
                } catch(Exception e) {
                   throw new TikaException("Invalid embedded resource", e);
                }
            } else {
View Full Code Here

        String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
        String incomingType = metadata.get(Metadata.CONTENT_TYPE);
        if (incomingCharset == null && incomingType != null) {
            // TIKA-341: Use charset in content-type
            MediaType mt = MediaType.parse(incomingType);
            if (mt != null) {
                incomingCharset = mt.getParameters().get("charset");
            }
        }

        if (incomingCharset != null) {
            detector.setDeclaredEncoding(CharsetUtils.clean(incomingCharset));
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.