Package org.apache.nutch.util

Examples of org.apache.nutch.util.MimeUtil


    return doc;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);
  }
View Full Code Here


 
 
  /** Creates a new instance of ZipTextExtractor */
  public ZipTextExtractor(Configuration conf) {
    this.conf = conf;
    this.MIME = new MimeUtil(conf);
  }
View Full Code Here

    this.url = url;
    this.base = base;
    this.content = content;
    this.metadata = metadata;

    this.mimeTypes = new MimeUtil(conf);
    this.contentType = getContentType(contentType, url, content);
  }
View Full Code Here

    this.orig = url.toString();
    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);
    tika = new Tika();

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);
View Full Code Here

    return doc;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);

    if (conf.getBoolean("moreIndexingFilter.mapMimeTypes", false) == true) {
      mapMimes = true;

      // Load the mapping
View Full Code Here

 
 
  /** Creates a new instance of ZipTextExtractor */
  public ZipTextExtractor(Configuration conf) {
    this.conf = conf;
    this.MIME = new MimeUtil(conf);
  }
View Full Code Here

    this.url = url;
    this.base = base;
    this.content = content;
    this.metadata = metadata;

    this.mimeTypes = new MimeUtil(conf);
    this.contentType = getContentType(contentType, url, content);
  }
View Full Code Here

  Parse parse;
  WebPage page = new WebPage();
  page.setBaseUrl(new Utf8("file:"+urlString));
  page.setContent(ByteBuffer.wrap(bytes));
  // set the content type?
  MimeUtil mimeutil = new MimeUtil(conf);
  String mtype = mimeutil.getMimeType(file);
  page.setContentType(new Utf8(mtype));
   
  parse = new ParseUtil(conf).parse("file:"+urlString, page);
  return parse.getText();
    }
View Full Code Here

    Configuration conf = NutchConfiguration.create();

    WebPage page = new WebPage();
    page.setBaseUrl(new Utf8(url));
    page.setContent(ByteBuffer.wrap(bytes));
    MimeUtil mimeutil = new MimeUtil(conf);
    String mtype = mimeutil.getMimeType(file);
    page.setContentType(new Utf8(mtype));

    new ParseUtil(conf).parse(url, page);

    ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
View Full Code Here

    this.orig = url.toString();
    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);

    if (File.LOG.isTraceEnabled()) {
View Full Code Here

TOP

Related Classes of org.apache.nutch.util.MimeUtil

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.