Package org.apache.nutch.util

Examples of org.apache.nutch.util.ObjectCache


  public URLNormalizers(Configuration conf, String scope) {
    this.conf = conf;
    this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
            URLNormalizer.X_POINT_ID);
    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (this.extensionPoint == null) {
      throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
              + " not found.");
    }

    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
    if (normalizers == null) {
      normalizers = getURLNormalizers(scope);
    }
    if (normalizers == EMPTY_NORMALIZERS) {
      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
      if (normalizers == null) {
        normalizers = getURLNormalizers(SCOPE_DEFAULT);
      }
    }
   
View Full Code Here


   *         scope.
   * @throws PluginRuntimeException
   */
  URLNormalizer[] getURLNormalizers(String scope) {
    List<Extension> extensions = getExtensions(scope);
    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (extensions == EMPTY_EXTENSION_LIST) {
      return EMPTY_NORMALIZERS;
    }
   
    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());

    Iterator<Extension> it = extensions.iterator();
    while (it.hasNext()) {
      Extension ext = it.next();
      URLNormalizer normalizer = null;
      try {
        // check to see if we've cached this URLNormalizer instance yet
        normalizer = (URLNormalizer) objectCache.getObject(ext.getId());
        if (normalizer == null) {
          // go ahead and instantiate it and then cache it
          normalizer = (URLNormalizer) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(), normalizer);
        }
        normalizers.add(normalizer);
      } catch (PluginRuntimeException e) {
        e.printStackTrace();
        LOG.warn("URLNormalizers:PluginRuntimeException when "
View Full Code Here

  private ExtensionPoint extensionPoint;
  private ParsePluginList parsePluginList;

  public ParserFactory(Configuration conf) {
    this.conf = conf;
    ObjectCache objectCache = ObjectCache.get(conf);
    this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
        Parser.X_POINT_ID);
    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
    if (this.parsePluginList == null) {
      this.parsePluginList = new ParsePluginsReader().parse(conf);
      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
    }

    if (this.extensionPoint == null) {
      throw new RuntimeException("x point " + Parser.X_POINT_ID + " not found.");
    }
View Full Code Here

   * @return a list of extensions to be used for this scope. If none, returns
   *         empty list.
   * @throws PluginRuntimeException
   */
  private List<Extension> getExtensions(String scope) {
    ObjectCache objectCache = ObjectCache.get(conf);
    List<Extension> extensions =
      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
                                                + scope);

    // Just compare the reference:
    // if this is the empty list, we know we will find no extension.
    if (extensions == EMPTY_EXTENSION_LIST) {
      return EMPTY_EXTENSION_LIST;
    }

    if (extensions == null) {
      extensions = findExtensions(scope);
      if (extensions != null) {
        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
      } else {
        // Put the empty extension list into cache
        // to remember we don't know any related extension.
        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
        extensions = EMPTY_EXTENSION_LIST;
      }
    }
    return extensions;
  }
View Full Code Here

  throws ParserNotFound {
   
    List<Parser> parsers = null;
    List<Extension> parserExts = null;
   
    ObjectCache objectCache = ObjectCache.get(conf);
   
    // TODO once the MimeTypes is available
    // parsers = getExtensions(MimeUtils.map(contentType));
    // if (parsers != null) {
    //   return parsers;
    // }
    // Last Chance: Guess content-type from file url...
    // parsers = getExtensions(MimeUtils.getMimeType(url));

    parserExts = getExtensions(contentType);
    if (parserExts == null) {
      throw new ParserNotFound(url, contentType);
    }

    parsers = new Vector<Parser>(parserExts.size());
    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
      Extension ext = (Extension) i.next();
      Parser p = null;
      try {
        //check to see if we've cached this parser instance yet
        p = (Parser) objectCache.getObject(ext.getId());
        if (p == null) {
          // go ahead and instantiate it and then cache it
          p = (Parser) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(),p);
        }
        parsers.add(p);
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          e.printStackTrace(LogUtil.getWarnStream(LOG));
View Full Code Here

  public Parser getParserById(String id) throws ParserNotFound {

    Extension[] extensions = this.extensionPoint.getExtensions();
    Extension parserExt = null;

    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (id != null) {
      parserExt = getExtension(extensions, id);
    }
    if (parserExt == null) {
      parserExt = getExtensionFromAlias(extensions, id);
    }

    if (parserExt == null) {
      throw new ParserNotFound("No Parser Found for id [" + id + "]");
    }
   
    // first check the cache          
    if (objectCache.getObject(parserExt.getId()) != null) {
      return (Parser) objectCache.getObject(parserExt.getId());

    // if not found in cache, instantiate the Parser   
    } else {
      try {
        Parser p = (Parser) parserExt.getExtensionInstance();
        objectCache.setObject(parserExt.getId(), p);
        return p;
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Canno initialize parser " +
                   parserExt.getDescriptor().getPluginId() +
View Full Code Here

   * @return a list of extensions to be used for this contentType.
   *         If none, returns <code>null</code>.
   */
  protected List<Extension> getExtensions(String contentType) {
   
    ObjectCache objectCache = ObjectCache.get(conf);
    // First of all, tries to clean the content-type
    String type = null;
    type = MimeUtil.cleanMimeType(contentType);


    List<Extension> extensions = (List<Extension>) objectCache.getObject(type);

    // Just compare the reference:
    // if this is the empty list, we know we will find no extension.
    if (extensions == EMPTY_EXTENSION_LIST) {
      return null;
    }
   
    if (extensions == null) {
      extensions = findExtensions(type);
      if (extensions != null) {
        objectCache.setObject(type, extensions);
      } else {
        // Put the empty extension list into cache
        // to remember we don't know any related extension.
        objectCache.setObject(type, EMPTY_EXTENSION_LIST);
      }
    }
    return extensions;
  }
View Full Code Here

  private ExtensionPoint extensionPoint;
  private ParsePluginList parsePluginList;

  public ParserFactory(Configuration conf) {
    this.conf = conf;
    ObjectCache objectCache = ObjectCache.get(conf);
    this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
        Parser.X_POINT_ID);
    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
    if (this.parsePluginList == null) {
      this.parsePluginList = new ParsePluginsReader().parse(conf);
      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
    }

    if (this.extensionPoint == null) {
      throw new RuntimeException("x point " + Parser.X_POINT_ID + " not found.");
    }
View Full Code Here

  throws ParserNotFound {
   
    List<Parser> parsers = null;
    List<Extension> parserExts = null;
   
    ObjectCache objectCache = ObjectCache.get(conf);
   
    // TODO once the MimeTypes is available
    // parsers = getExtensions(MimeUtils.map(contentType));
    // if (parsers != null) {
    //   return parsers;
    // }
    // Last Chance: Guess content-type from file url...
    // parsers = getExtensions(MimeUtils.getMimeType(url));

    parserExts = getExtensions(contentType);
    if (parserExts == null) {
      throw new ParserNotFound(url, contentType);
    }

    parsers = new Vector<Parser>(parserExts.size());
    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
      Extension ext = (Extension) i.next();
      Parser p = null;
      try {
        //check to see if we've cached this parser instance yet
        p = (Parser) objectCache.getObject(ext.getId());
        if (p == null) {
          // go ahead and instantiate it and then cache it
          p = (Parser) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(),p);
        }
        parsers.add(p);
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("ParserFactory:PluginRuntimeException when "
View Full Code Here

  public Parser getParserById(String id) throws ParserNotFound {

    Extension[] extensions = this.extensionPoint.getExtensions();
    Extension parserExt = null;

    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (id != null) {
      parserExt = getExtension(extensions, id);
    }
    if (parserExt == null) {
      parserExt = getExtensionFromAlias(extensions, id);
    }

    if (parserExt == null) {
      throw new ParserNotFound("No Parser Found for id [" + id + "]");
    }
   
    // first check the cache          
    if (objectCache.getObject(parserExt.getId()) != null) {
      return (Parser) objectCache.getObject(parserExt.getId());

    // if not found in cache, instantiate the Parser   
    } else {
      try {
        Parser p = (Parser) parserExt.getExtensionInstance();
        objectCache.setObject(parserExt.getId(), p);
        return p;
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Canno initialize parser " +
                   parserExt.getDescriptor().getPluginId() +
View Full Code Here

TOP

Related Classes of org.apache.nutch.util.ObjectCache

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.