Package com.ikanow.infinit.e.harvest.utils

Examples of com.ikanow.infinit.e.harvest.utils.PropertiesManager


      catch (Exception e){}
    }   
   
    // DEFAULT CONFIGURATION
   
    PropertiesManager properties = new PropertiesManager();
   
    try {
      if (null == bWriteMetadata) { // (ie not per source)
        bWriteMetadata = properties.getExtractionCapabilityEnabled(getName(), "store_raw_events");     
      }
    }
    catch (Exception e) {}

    // ACTUALLY DO CONFIGURATION
View Full Code Here


      catch (Exception e){}
     
    }
    // DEFAULT CONFIGURATION
   
    PropertiesManager properties = new PropertiesManager();
   
    // 1] POST PROC
   
    if (-1 == nPostProc) { // (ie no per source config)
      try {
        nPostProc = properties.getAlchemyPostProcessingSetting();       
      }   
      catch (Exception e) {
        nPostProc = -1;
      }    
    }
    // 2] SENTIMENT
   
    try {
      if (null == bSentimentEnabled) { // (ie not per source)
        bSentimentEnabled = properties.getExtractionCapabilityEnabled(getName(), "sentiment");     
      }
    }
    catch (Exception e) {}
   
    // 3] CONCEPTS
   
    try {
      if (null == bConceptsEnabled) { // (ie not per source)
        bConceptsEnabled = properties.getExtractionCapabilityEnabled(getName(), "concepts");     
      }
    }
    catch (Exception e) {}

    // ACTUALLY DO CONFIG
View Full Code Here

   * Default Constructor
   */
  public DatabaseHarvester()
  {     
    sourceTypesCanHarvest.add(InfiniteEnums.DATABASE);
    PropertiesManager pm = new PropertiesManager();
    maxDocsPerCycle = pm.getMaxDocsPerSource();
  }
View Full Code Here

   * @return void
   */
  private void processDatabase(SourcePojo source) throws ClassNotFoundException, SQLException, IOException
  {
    // Set up properties and RDBMS Manager
    properties = new PropertiesManager();
    rdbms = new RdbmsManager();

    // Get the type of database to access from the source object
    String dt = source.getDatabaseConfig().getDatabaseType()
   
View Full Code Here

      catch (Exception e){}
    }
   
    // DEFAULT CONFIGURATION
   
    PropertiesManager properties = new PropertiesManager();
   
    // 2] SENTIMENT
   
    try {
      if (null == bSentimentEnabled) { // (ie not per source)
        bSentimentEnabled = properties.getExtractionCapabilityEnabled(getName(), "sentiment");     
      }
    }
    catch (Exception e) {}
   
    // 3] CONCEPTS
   
    try {
      if (null == bConceptsEnabled) { // (ie not per source)
        bConceptsEnabled = properties.getExtractionCapabilityEnabled(getName(), "concepts");     
      }
    }
    catch (Exception e) {}

    // 4] KEYWORD QUALITY
   
    try {
      if (null == bStrict) { // (ie not per source)
        bStrict = properties.getExtractionCapabilityEnabled(getName(), "strict");     
      }
    }
    catch (Exception e) {}
   
   
View Full Code Here

        TrustManagerManipulator.allowAllSSL();   
      }
      finally {}
    }
   
    PropertiesManager props = new PropertiesManager();
    String sTypes = props.getHarvesterTypes();
    if (overrideTypeSettings) { // (override API settings in test mode)
      sTypes = "Feed,File,Database,Logstash";
    }
    String sType[] = sTypes.split("\\s*,\\s*");

   
    // Add a harvester for each data type
    for (String s: sType) {
      if (s.equalsIgnoreCase("database")) {
        try {
          this.harvesters.add(new DatabaseHarvester());
        }
        catch (Exception e) {
          logger.error(s + " not supported: " + e.getMessage());
        }
        catch(NoClassDefFoundError e) {
          logger.error(s + " not supported: " + e.getMessage());
        }       
      }
      else if (s.equalsIgnoreCase("logstash")) {
        try {
          this.harvesters.add(new LogstashHarvester());
        }
        catch (Exception e) {
          logger.error(s + " not supported: " + e.getMessage());
        }
        catch(NoClassDefFoundError e) {
          logger.error(s + " not supported: " + e.getMessage());
        }               
      }
      else if (s.equalsIgnoreCase("file")) {

        // According to http://www.ryanchapin.com/fv-b-4-648/java-lang-OutOfMemoryError--unable-to-create-new-native-thread-Exception-When-Using-SmbFileInputStream.html
        // this is needed to avoid java.lang.OutOfMemoryError (intermittent - for me at least, it's happened for exactly 1 source, but consistently when it does)
        System.setProperty("jcifs.resolveOrder", "DNS");
        System.setProperty("jcifs.smb.client.dfs.disabled", "true");

        try {
          this.harvesters.add(new FileHarvester());
        }
        catch (Exception e) {
          logger.error(s + " not supported: " + e.getMessage());
        }
        catch(NoClassDefFoundError e) {
          logger.error(s + " not supported: " + e.getMessage());
        }       
      }
      else if (s.equalsIgnoreCase("feed")) {
        try {
          this.harvesters.add(new FeedHarvester());
        }
        catch (Exception e) {
          logger.error(s + " not supported: " + e.getMessage());
        }
        catch(NoClassDefFoundError e) {
          logger.error(s + " not supported: " + e.getMessage());
        }       
      }
    }

    // Load all the extractors, set up defaults
    entity_extractor_mappings = new HashMap<String, IEntityExtractor>();
    text_extractor_mappings = new HashMap<String, ITextExtractor>();

    // Load custom text/entity extractors
    synchronized (HarvestController.class) {
      if (null == customExtractors) {
        customExtractors = new HashMap<String, Class>();
        customExtractorClassLoader = HarvestController.class.getClassLoader();
      }
      // Text extractors:
      String customTextList = props.getCustomTextExtractors();
      if (null != customTextList) {
        String customTextArray[] = customTextList.split("\\s*,\\s*");
        for (String customText: customTextArray) {
          if (!customExtractors.containsKey(customText)) {
            // (else already have this extractor)
            try {
              Class customTextExtractor = customExtractorClassLoader.loadClass(customText);
              ITextExtractor obj = (ITextExtractor)customTextExtractor.newInstance();
              text_extractor_mappings.put(obj.getName().toLowerCase(), obj);
              customExtractors.put(customText, customTextExtractor);
            }
            catch (Exception e) {
              logger.error("ITextExtractor: Couldn't load " + customText +": " + e.getMessage(), e);
            }
            catch(NoClassDefFoundError e) {
              logger.error("ITextExtractor: Couldn't load " + customText +": " + e.getMessage(), e);
            }       
          }       
          else { // Already loaded, put in again
            try {
              Class customTextExtractor = customExtractors.get(customText)
              ITextExtractor obj = (ITextExtractor)customTextExtractor.newInstance();
              text_extractor_mappings.put(obj.getName().toLowerCase(), obj);           
            }
            catch (Exception e) {
              logger.error("ITextExtractor: Couldn't use already loaded " + customText +": " + e.getMessage(), e);
            }
            catch(NoClassDefFoundError e) {
              logger.error("ITextExtractor: Couldn't use already loaded " + customText +": " + e.getMessage(), e);
            }       
          }
        }
      }//TESTED
      // Entity extractors
      String customEntityList = props.getCustomEntityExtractors();
      if (null != customEntityList) {
        String customEntityArray[] = customEntityList.split("\\s*,\\s*");
        for (String customEntity: customEntityArray) {
          if (!customExtractors.containsKey(customEntity)) {
            // (else already have this extractor - but may have it for text, so some work to do)
            try {
              Class customEntityExtractor = customExtractorClassLoader.loadClass(customEntity);
              IEntityExtractor obj = (IEntityExtractor)customEntityExtractor.newInstance();
              entity_extractor_mappings.put(obj.getName().toLowerCase(), obj);
              customExtractors.put(customEntity, customEntityExtractor);
            }
            catch (Exception e) {
              logger.error("IEntityExtractor: Couldn't load " + customEntity +": " + e.getMessage(), e);
            }
            catch(NoClassDefFoundError e) {
              logger.error("IEntityExtractor: Couldn't load " + customEntity +": " + e.getMessage(), e);
            }       
          }
          else { // If this object exists and if it's a text extractor, then see if it's also an entity extractor
            try {
              Class customEntityExtractor = customExtractors.get(customEntity);           
              IEntityExtractor obj = (IEntityExtractor)customEntityExtractor.newInstance();
              entity_extractor_mappings.put(obj.getName(), obj);
            }
            catch (Exception e) {
              logger.error("IEntityExtractor: Couldn't use already loaded " + customEntity +": " + e.getMessage(), e);           
            }
            catch(NoClassDefFoundError e) {
              logger.error("IEntityExtractor: Couldn't use already loaded " + customEntity +": " + e.getMessage(), e);           
            }       
          }
        }
      }//TESTED
    }

    try {
      entity_extractor_mappings.put("opencalais", new ExtractorOpenCalais());
    }
    catch (Exception e) {
      logger.warn("Can't use OpenCalais as entity extractor: " + e.getMessage());     
    }
    try {
      entity_extractor_mappings.put("textrank", new TextRankExtractor());
    }
    catch (Exception e) {
      logger.warn("Can't use textrank as entity extractor: " + e.getMessage());     
    }

    try {
      ExtractorAlchemyAPI both = new ExtractorAlchemyAPI();
      entity_extractor_mappings.put("alchemyapi", both);
      text_extractor_mappings.put("alchemyapi", both)
      ExtractorAlchemyAPI_Metadata both_metadata = new ExtractorAlchemyAPI_Metadata();
      entity_extractor_mappings.put("alchemyapi-metadata", both_metadata);
      text_extractor_mappings.put("alchemyapi-metadata", both_metadata);       
    }
    catch (Exception e) {
      logger.warn("Can't use AlchemyAPI as entity/text extractor: " + e.getMessage());     
    }
    try {
      text_extractor_mappings.put("boilerpipe", new TextExtractorBoilerpipe());
    }
    catch (Exception e) {
      logger.warn("Can't use Boilerpipe as text extractor: " + e.getMessage());     
    }
    try {
      text_extractor_mappings.put("tika", new TextExtractorTika());
    }
    catch (Exception e) {
      logger.warn("Can't use Tika as text extractor: " + e.getMessage());     
    }

    if (null != pm.getDefaultEntityExtractor()) {
      default_entity_extractor = entity_extractor_mappings.get(pm.getDefaultEntityExtractor().toLowerCase());
    }
    else {
      default_entity_extractor = null;
    }
    if (null != pm.getDefaultTextExtractor()) {
      default_text_extractor = text_extractor_mappings.get(pm.getDefaultTextExtractor().toLowerCase());
    }
    else {
      try {
        default_text_extractor = new TextExtractorBoilerpipe();     
      }
      catch (Exception e) {
        logger.warn("Can't use BoilerPlate as default text extractor: " + e.getMessage());
      }
    }
    nBetweenFeedDocs_ms = props.getWebCrawlWaitTime();
   
    // Set up security manager - basically always needed so might as well create here
   
    _securityManager = new IkanowSecurityManager();               
  }
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.harvest.utils.PropertiesManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.