Package org.apache.nutch.util.mime

Examples of org.apache.nutch.util.mime.MimeType


                  mimetype = value.toLowerCase().
                  replaceAll(WHITESPACE,"-");
                  if (mimetype == null) {
                    mimetype = "no-type";
                  }
                  new MimeType(value.toLowerCase());
                } catch (MimeTypeException e) {
                  mimetype = "no-type";
                }
                if (skip(mimetype)) { //XXX
                }
View Full Code Here


      fs.close();
    }
  }

  private String getContentType(String typeName, String url, byte[] data) {
    MimeType type = null;
    try {
      typeName = MimeType.clean(typeName);
      type = typeName == null ? null : this.mimeTypes.forName(typeName);
    } catch (MimeTypeException mte) {
      // Seems to be a malformed mime type name...
    }

    if (typeName == null || type == null || !type.matches(url)) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, or the one found doesn't match the url pattern
      // it shouldbe, then guess a mime-type from the url pattern
      type = this.mimeTypes.getMimeType(url);
      typeName = type == null ? typeName : type.getName();
    }
    if (typeName == null || type == null
        || (this.mimeTypeMagic && type.hasMagic() && !type.matches(data))) {
      // If no mime-type already found, or the one found doesn't match
      // the magic bytes it should be, then, guess a mime-type from the
      // document content (magic bytes)
      type = this.mimeTypes.getMimeType(data);
      typeName = type == null ? typeName : type.getName();
    }
    return typeName;
  }
View Full Code Here

    add(url, doc, ARC_NAME, data.getMeta(ARC_NAME), false, true, false, false);
         
        // add mimetype
        String mimetype = data.getMeta(CONTENT_TYPE_KEY);     
        if (mimetype == null || mimetype.length() == 0) {
          MimeType mt = (MIME.getMimeType(url));       
          if (mt != null) {
            mimetype = mt.getName();
          }
        }
     
        try
        {
          // Test the mimetype makes some sense. If not, don't add.
          mimetype = (new MimeType(mimetype)).getName();
        }
        catch (MimeTypeException e) {
          LOG.error(url + ", mimetype " + mimetype + ": " + e.toString());
          // Clear mimetype because caused exception.
          mimetype = null;
View Full Code Here

    // metadata with a key of 'content-type'.
    String mimetype = parse.getData().getMeta(CONTENT_TYPE_KEY);
   
    if (mimetype == null || mimetype.length() == 0)
    {
      MimeType mt = (MIME.getMimeType(urlStr));
     
      if (mt != null)
      {
        mimetype = mt.getName();
      }
    }
   
    try
    {
      // Test the mimetype makes some sense. If not, don't add.
      mimetype = (new MimeType(mimetype)).getName();
    }
    catch (MimeTypeException e)
    {
      LOGGER.error(urlStr + ", mimetype " + mimetype + ": "
        + e.toString());
View Full Code Here

    // Read in first block. If mimetype still null, look for MAGIC.
    int len = rec.read(this.buffer, 0, this.buffer.length);
   
    if (mimetype == null)
    {
      MimeType mt = this.mimeTypes.getMimeType(this.buffer);
     
      if (mt == null || mt.getName() == null)
      {
        LOG.warn("Failed to get mimetype for: " + url);
       
        return;
      }
     
      mimetype = mt.getName();
    }
   
    metaData.set(ImportArcs.CONTENT_TYPE_KEY, mimetype);

    // How much do we read total? If pdf, we will read more. If equal to -1,
View Full Code Here

      return checkMimetype(mimetype.toLowerCase());
    }
   
    if (mts != null && url != null)
    {
      final MimeType mt = mts.getMimeType(url);
     
      if (mt != null)
      {
        return checkMimetype(mt.getName().toLowerCase());
      }
    }
   
    return null;
  }
View Full Code Here

    }

    // Test the mimetype makes sense. If not, clear it.
    try
    {
      new MimeType(mimetype);
    }
    catch (final MimeTypeException e)
    {
      mimetype = null;
    }
View Full Code Here

    // Read in first block. If mimetype still null, look for MAGIC.
    int len = rec.read(this.buffer, 0, this.buffer.length);
    // check mimetype
    if (mimetype == null) {
      MimeType mt = this.mimeTypes.getMimeType(this.buffer);

      if (mt == null || mt.getName() == null) {
        LOG.warn("ProcessArcs" + "Failed to get mimetype for: " + url);

        return;
      }

      mimetype = mt.getName();
    }
   
    // filter documents
    if (filter(mimetype)) {
      return;
View Full Code Here

    if (mimetype != null && mimetype.length() > 0) {
      return checkMimetype(mimetype.toLowerCase());
    }

    if (mts != null && url != null) {
      final MimeType mt = mts.getMimeType(url);

      if (mt != null) {
        return checkMimetype(mt.getName().toLowerCase());
      }
    }

    return null;
  }
View Full Code Here

      return null;
    }

    // Test the mimetype makes sense. If not, clear it.
    try {
      new MimeType(mimetype);
    } catch (final MimeTypeException e) {
      mimetype = null;
    }

    return mimetype;
View Full Code Here

TOP

Related Classes of org.apache.nutch.util.mime.MimeType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.