Package org.apache.tika.mime

Examples of org.apache.tika.mime.MimeType$RootXML


   * @param data
   *          The byte data, returned from the crawl, if any.
   * @return The correctly, automatically guessed {@link MimeType} name.
   */
  public String autoResolveContentType(String typeName, String url, byte[] data) {
    MimeType type = null;
    String cleanedMimeType = null;

    try {
      cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
          .forName(MimeUtil.cleanMimeType(typeName)).getName()
          : null;
    } catch (MimeTypeException mte) {
      // Seems to be a malformed mime type name...
    }

    // first try to get the type from the cleaned type name
    try {
      type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
          : null;
    } catch (MimeTypeException e) {
      type = null;
    }

    // if returned null, or if it's the default type then try url resolution
    if (type == null
        || (type != null && type.getName().equals(MimeTypes.DEFAULT))) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, then guess a mime-type from the url pattern
      type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
          .getMimeType(url) : type;
    }

    // if magic is enabled use mime magic to guess if the mime type returned
    // from the magic guess is different than the one that's already set so far
    // if it is, and it's not the default mime type, then go with the mime type
    // returned by the magic
    if (this.mimeMagic) {
      MimeType magicType = this.mimeTypes.getMimeType(data);
      if (magicType != null && !magicType.getName().equals(MimeTypes.DEFAULT)
          && type != null && !type.getName().equals(magicType.getName())) {
        // If magic enabled and the current mime type differs from that of the
        // one returned from the magic, take the magic mimeType
        type = magicType;
      }

View Full Code Here


        // Determines the MIMEType based on Content-Type hint if available.
        final String contentType = metadata.get(Metadata.CONTENT_TYPE);
        String candidateMIMEType = null;
        if (contentType != null) {
            try {
                MimeType type = types.forName(contentType);
                if (type != null) {
                    if( ! isPlainMIMEType(type.getName()) ) {
                        return type.getName();
                    } else {
                        candidateMIMEType = type.getName();
                    }
                }
            }
            catch (MimeTypeException mte) {
                // Malformed ocntent-type value, ignore.
            }
        }

        // Determines the MIMEType based on resource name hint if available.
        final String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
        if (resourceName != null) {
            MimeType type = types.getMimeType(resourceName);
            if (type != null) {
                return type.getName();
            }
        }

        // Finally, use the default type if no matches found
        if(candidateMIMEType != null) {
View Full Code Here

   * @param data
   * @param url
   * @return
   */
  private NutchDocument addType(NutchDocument doc, ParseData data, String url) {
    MimeType mimeType = null;
    String contentType = data.getMeta(Response.CONTENT_TYPE);
    if (contentType == null) {
      // Note by Jerome Charron on 20050415:
      // Content Type not solved by a previous plugin
      // Or unable to solve it... Trying to find it
      // Should be better to use the doc content too
      // (using MimeTypes.getMimeType(byte[], String), but I don't know
      // which field it is?
      // if (MAGIC) {
      //   contentType = MIME.getMimeType(url, content);
      // } else {
      //   contentType = MIME.getMimeType(url);
      // }
      mimeType = MIME.getMimeType(url);
    } else {
      mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
    }
       
    // Checks if we solved the content-type.
    if (mimeType == null) {
      return doc;
    }

    contentType = mimeType.getName();
   
    doc.add("type", contentType);

    String[] parts = getParts(contentType);

View Full Code Here

    // set headers
    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
    headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(f
        .lastModified()));
   
    MimeType mimeType = MIME.getMimeType(f);
    String mimeTypeString = mimeType != null ? mimeType.getName() : "";
    headers.set(Response.CONTENT_TYPE, mimeTypeString);

    // response code
    this.code = 200; // http OK
  }
View Full Code Here

    private static String getFileExtension(String mimeType) {
        if (StringUtils.isEmpty(mimeType)) return "";
        MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
        try {
            MimeType type = allTypes.forName(mimeType);
            return type.getExtension();
        }
        catch (MimeTypeException e) {
            return "";
        }
    }
View Full Code Here

        if (!stream.markSupported()) {
            stream = new BufferedInputStream(stream);
        }

        // Automatically detect the MIME type of the document
        MimeType type = getMimeType(stream, metadata);
        metadata.set(Metadata.CONTENT_TYPE, type.getName());

        // Parse the document
        super.parse(stream, handler, metadata);
    }
View Full Code Here

            throws IOException {
        // Get type based on magic prefix
        stream.mark(types.getMinLength());
        try {
            byte[] prefix = getPrefix(stream, types.getMinLength());
            MimeType type = types.getMimeType(prefix);
            if (type != null) {
                return type;
            }
        } finally {
            stream.reset();
        }

        // Get type based on resourceName hint (if available)
        String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
        if (resourceName != null) {
            MimeType type = types.getMimeType(resourceName);
            if (type != null) {
                return type;
            }
        }
View Full Code Here

            // using the original content
            if (mimeType == null | forceMTDetection) {
                if (inputDoc.getContent() != null) {
                    Metadata meta = new Metadata();
                    meta.set(Metadata.RESOURCE_NAME_KEY, inputDoc.getUrl());
                    MimeType mimetype = null;
                    try {
                        MediaType mediaType = detector
                                .detect(new ByteArrayInputStream(inputDoc
                                        .getContent()), meta);
                        mimetype = mimetypes.forName(mediaType.getType() + "/"
                                + mediaType.getSubtype());
                    } catch (IOException e) {
                        LOG.error("Exception", e);
                    } catch (MimeTypeException e) {
                        LOG.error("Exception", e);
                    }
                    mt = mimetype.getName();
                } else if (mimeType == null && inputDoc.getText() != null) {
                    // force it to text
                    mt = "text/plain";
                }
            } else {
View Full Code Here

   * @return The correctly, automatically guessed {@link MimeType} name.
   */
  public String autoResolveContentType(String typeName, String url, byte[] data) {
    String retType = null;
    String magicType = null;
    MimeType type = null;
    String cleanedMimeType = null;

    try {
      cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
          .forName(MimeUtil.cleanMimeType(typeName)).getName()
          : null;
    } catch (MimeTypeException mte) {
      // Seems to be a malformed mime type name...
    }

    // first try to get the type from the cleaned type name
    try {
      type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
          : null;
    } catch (MimeTypeException e) {
      type = null;
    }

    // if returned null, or if it's the default type then try url resolution
    if (type == null
        || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, then guess a mime-type from the url pattern
      try {
        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
        Tika tika = new Tika(tikaConfig);
        retType = tika.detect(url) != null ? tika.detect(url) : null;
      } catch (Exception e) {
        String message = "Problem loading default Tika configuration";
        LOG.error(message, e);
        throw new RuntimeException(e);
      }
    } else {
        retType = type.getName();
    }

    // if magic is enabled use mime magic to guess if the mime type returned
    // from the magic guess is different than the one that's already set so far
    // if it is, and it's not the default mime type, then go with the mime type
View Full Code Here

    // set headers
    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
    headers.set(Response.LAST_MODIFIED,
        HttpDateFormat.toString(f.lastModified()));

    MimeType mimeType = MIME.getMimeType(f);
    String mimeTypeString = mimeType != null ? mimeType.getName() : "";
    headers.set(Response.CONTENT_TYPE, mimeTypeString);

    // response code
    this.code = 200; // http OK
  }
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MimeType$RootXML

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.