Package org.apache.nutch.util.mime

Examples of org.apache.nutch.util.mime.MimeType$Magic


    return doc;
  }

  // Add Content-Type and its primaryType and subType
  private Document addType(Document doc, ParseData data, String url) {
    MimeType mimeType = null;
    String contentType = data.getMeta(Response.CONTENT_TYPE);
    if (contentType == null) {
  // Note by Jerome Charron on 20050415:
        // Content Type not solved by a previous plugin
        // Or unable to solve it... Trying to find it
        // Should be better to use the doc content too
        // (using MimeTypes.getMimeType(byte[], String), but I don't know
        // which field it is?
        // if (MAGIC) {
        //   contentType = MIME.getMimeType(url, content);
        // } else {
        //   contentType = MIME.getMimeType(url);
        // }
        mimeType = MIME.getMimeType(url);
    } else {
        try {
            mimeType = new MimeType(contentType);
        } catch (MimeTypeException e) {
            if (LOG.isWarnEnabled()) { LOG.warn(url + e.toString()); }
            mimeType = null;
        }
    }
       
    // Checks if we solved the content-type.
    if (mimeType == null) {
      return doc;
    }

    contentType = mimeType.getName();
    String primaryType = mimeType.getPrimaryType();
    String subType = mimeType.getSubType();
    // leave this for future improvement
    //MimeTypeParameterList parameterList = mimeType.getParameters()

    // add contentType, primaryType and subType to field "type"
    // as un-stored, indexed and un-tokenized, so that search results
View Full Code Here


    // set headers
    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
    headers.set(Response.LAST_MODIFIED, this.file.httpDateFormat.toString(f
        .lastModified()));
    MimeTypes mimeTypes = MimeTypes.get(conf.get("mime.types.file"));
    MimeType mimeType = mimeTypes.getMimeType(f);
    String mimeTypeString = mimeType != null ? mimeType.getName() : "";
    headers.set(Response.CONTENT_TYPE, mimeTypeString);

    // response code
    this.code = 200; // http OK
  }
View Full Code Here

      fs.close();
    }
  }

  private String getContentType(String typeName, String url, byte[] data) {
    MimeType type = null;
    try {
      typeName = MimeType.clean(typeName);
      type = typeName == null ? null : this.mimeTypes.forName(typeName);
    } catch (MimeTypeException mte) {
      // Seems to be a malformed mime type name...
    }

    if (typeName == null || type == null || !type.matches(url)) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, or the one found doesn't match the url pattern
      // it shouldbe, then guess a mime-type from the url pattern
      type = this.mimeTypes.getMimeType(url);
      typeName = type == null ? typeName : type.getName();
    }
    if (typeName == null || type == null
        || (this.mimeTypeMagic && type.hasMagic() && !type.matches(data))) {
      // If no mime-type already found, or the one found doesn't match
      // the magic bytes it should be, then, guess a mime-type from the
      // document content (magic bytes)
      type = this.mimeTypes.getMimeType(data);
      typeName = type == null ? typeName : type.getName();
    }
    return typeName;
  }
View Full Code Here

  }

  public Content toContent() {
    String contentType = getHeader("Content-Type");
    if (contentType == null) {
      MimeType type = null;
      if (MAGIC) {
        type = MIME.getMimeType(orig, content);
      } else {
        type = MIME.getMimeType(orig);
      }
      if (type != null) {
          contentType = type.getName();
      } else {
          contentType = "";
      }
    }
    if (content == null) content = EMPTY_CONTENT;
View Full Code Here

  public byte[] getContent() { return content; }

  public Content toContent() {
    String contentType = getHeader("Content-Type");
    if (contentType == null) {
      MimeType type = null;
      if (MAGIC) {
        type = MIME.getMimeType(orig, content);
      } else {
        type = MIME.getMimeType(orig);
      }
      if (type != null) {
          contentType = type.getName();
      } else {
          contentType = "";
      }
    }
    return new Content(orig, base, content, contentType, headers);
View Full Code Here

    hdrs.put("Content-Length", new Long(size).toString());

    hdrs.put("Last-Modified",
      this.file.httpDateFormat.toString(f.lastModified()));

    MimeType contentType = null;
    if (MAGIC) {
      contentType = MIME.getMimeType(f.getName(), this.content);
    } else {
      contentType = MIME.getMimeType(f.getName());
    }
    if (contentType != null) {
        hdrs.put("Content-Type", contentType.getName());
    }
    this.headers.putAll(hdrs);

    // response code
    this.code = 200; // http OK
View Full Code Here

      //this.headers.put("content-type", "text/html");
      this.headers.put("Last-Modified",
        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
      this.content = os.toByteArray();

      MimeType contentType = null;
      if (MAGIC) {
        contentType = MIME.getMimeType(path, this.content);
      } else {
        contentType = MIME.getMimeType(path);
      }
      if (contentType != null) {
        this.headers.put("Content-Type", contentType.getName());
      }

//      // approximate bytes sent and read
//      if (this.httpAccounting != null) {
//        this.httpAccounting.incrementBytesSent(path.length());
//        this.httpAccounting.incrementBytesRead(this.content.length);
//      }

      this.code = 200; // http OK

    } catch (FtpExceptionControlClosedByForcedDataClose e) {

      // control connection is off, clean up
      // ftp.client.disconnect();
      if (ftp.followTalk)
        Ftp.LOG.info("delete client because server cut off control channel: "+e);
      ftp.client = null;

      // in case this FtpExceptionControlClosedByForcedDataClose is
      // thrown by retrieveList() (not retrieveFile()) above,
      if (os == null) { // indicating throwing by retrieveList()
        //throw new FtpException("fail to get attibutes: "+path);
        Ftp.LOG.warning(
            "Please try larger maxContentLength for ftp.client.retrieveList(). "
          + e);
        // in a way, this is our request fault
        this.code = 400// http Bad request
        return;
      }

      FTPFile ftpFile = (FTPFile) list.get(0);
      this.headers.put("Content-Length",
        new Long(ftpFile.getSize()).toString());
      //this.headers.put("content-type", "text/html");
      this.headers.put("Last-Modified",
        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
      this.content = os.toByteArray();

      MimeType contentType = null;
      if (MAGIC) {
        contentType = MIME.getMimeType(path, this.content);
      } else {
        contentType = MIME.getMimeType(path);
      }
      if (contentType != null) {
        this.headers.put("Content-Type", contentType.getName());
      }

//      // approximate bytes sent and read
//      if (this.httpAccounting != null) {
//        this.httpAccounting.incrementBytesSent(path.length());
View Full Code Here

    return doc;
  }

  // Add Content-Type and its primaryType and subType
  private Document addType(Document doc, Properties metaData, String url) {
    MimeType mimeType = null;
    String contentType = metaData.getProperty("content-type");
    if (contentType == null) {
  // Note by Jerome Charron on 20050415:
        // Content Type not solved by a previous plugin
        // Or unable to solve it... Trying to find it
        // Should be better to use the doc content too
        // (using MimeTypes.getMimeType(byte[], String), but I don't know
        // which field it is?
        // if (MAGIC) {
        //   contentType = MIME.getMimeType(url, content);
        // } else {
        //   contentType = MIME.getMimeType(url);
        // }
        mimeType = MIME.getMimeType(url);
    } else {
        try {
            mimeType = new MimeType(contentType);
        } catch (MimeTypeException e) {
            LOG.warning(url + e.toString());
            mimeType = null;
        }
    }
       
    // Checks if we solved the content-type.
    if (mimeType == null) {
      return doc;
    }

    contentType = mimeType.getName();
    String primaryType = mimeType.getPrimaryType();
    String subType = mimeType.getSubType();
    // leave this for future improvement
    //MimeTypeParameterList parameterList = mimeType.getParameters()

    // add contentType, primaryType and subType to field "type"
    // as un-stored, indexed and un-tokenized, so that search results
View Full Code Here

    return doc;
  }

  // Add Content-Type and its primaryType and subType
  private Document addType(Document doc, ParseData data, String url) {
    MimeType mimeType = null;
    String contentType = data.getMeta(Response.CONTENT_TYPE);
    if (contentType == null) {
  // Note by Jerome Charron on 20050415:
        // Content Type not solved by a previous plugin
        // Or unable to solve it... Trying to find it
        // Should be better to use the doc content too
        // (using MimeTypes.getMimeType(byte[], String), but I don't know
        // which field it is?
        // if (MAGIC) {
        //   contentType = MIME.getMimeType(url, content);
        // } else {
        //   contentType = MIME.getMimeType(url);
        // }
        mimeType = MIME.getMimeType(url);
    } else {
        try {
            mimeType = new MimeType(contentType);
        } catch (MimeTypeException e) {
            if (LOG.isWarnEnabled()) { LOG.warn(url + e.toString()); }
            mimeType = null;
        }
    }
       
    // Checks if we solved the content-type.
    if (mimeType == null) {
      return doc;
    }

    contentType = mimeType.getName();
    String primaryType = mimeType.getPrimaryType();
    String subType = mimeType.getSubType();
    // leave this for future improvement
    //MimeTypeParameterList parameterList = mimeType.getParameters()

    // add contentType, primaryType and subType to field "type"
    // as un-stored, indexed and un-tokenized, so that search results
View Full Code Here

      fs.close();
    }
  }

  private String getContentType(String typeName, String url, byte[] data) {
    MimeType type = null;
    try {
        typeName = MimeType.clean(typeName);
        type = typeName == null ? null : this.mimeTypes.forName(typeName);
    } catch (MimeTypeException mte) {
        // Seems to be a malformed mime type name...
    }

    if (typeName == null || type == null || !type.matches(url)) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, or the one found doesn't match the url pattern
      // it shouldbe, then guess a mime-type from the url pattern
      type = this.mimeTypes.getMimeType(url);
      typeName = type == null ? typeName : type.getName();
    }
    if (typeName == null || type == null ||
        (this.mimeTypeMagic && type.hasMagic() && !type.matches(data))) {
      // If no mime-type already found, or the one found doesn't match
      // the magic bytes it should be, then, guess a mime-type from the
      // document content (magic bytes)
      type = this.mimeTypes.getMimeType(data);
      typeName = type == null ? typeName : type.getName();
    }
    return typeName;
  }
View Full Code Here

TOP

Related Classes of org.apache.nutch.util.mime.MimeType$Magic

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.