Package org.apache.hadoop.conf

Examples of org.apache.hadoop.conf.Configurable


/** Create and run the Bayes Trainer. */
public class BayesWeightSummerDriver implements BayesJob {

  @Override
  public void runJob(Path input, Path output, BayesParameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesWeightSummerDriver.class);
    conf.setJobName("Bayes Weight Summer Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output, "trainer-weights");
    FileOutputFormat.setOutputPath(conf, outPath);
    HadoopUtil.delete(conf, outPath);
    // conf.setNumReduceTasks(1);
    // conf.setNumMapTasks(100);
    conf.setMapperClass(BayesWeightSummerMapper.class);
    // see the javadoc for the spec for file input formats: first token is key,
    // rest is input. Whole document on one line
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesWeightSummerReducer.class);
    conf.setReducerClass(BayesWeightSummerReducer.class);
    conf.setOutputFormat(BayesWeightSummerOutputFormat.class);

    conf.set("bayes.parameters", params.toString());
    conf.set("output.table", output.toString());
   
    client.setConf(conf);
   
    JobClient.runJob(conf);
  }
View Full Code Here


   *
   * @param params
   *          The Job parameters containing the gramSize, input output folders, defaultCat, encoding
   */
  public static void runJob(Parameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesClassifierDriver.class);
    conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath"));
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath")));
    Path outPath = new Path(params.get("testDirPath") + "-output");
    FileOutputFormat.setOutputPath(conf, outPath);
   
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setMapperClass(BayesClassifierMapper.class);
    conf.setCombinerClass(BayesClassifierReducer.class);
    conf.setReducerClass(BayesClassifierReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
   
    HadoopUtil.delete(conf, outPath);
    conf.set("bayes.parameters", params.toString());
   
    client.setConf(conf);
    JobClient.runJob(conf);
   
    Path outputFiles = new Path(outPath, "part*");
    ConfusionMatrix matrix = readResult(outputFiles, conf, params);
    log.info("{}", matrix);
View Full Code Here

 
  private static final Logger log = LoggerFactory.getLogger(BayesThetaNormalizerDriver.class);

  @Override
  public void runJob(Path input, Path output, BayesParameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesThetaNormalizerDriver.class);
   
    conf.setJobName("Bayes Theta Normalizer Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output, "trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    // conf.setNumMapTasks(100);
    // conf.setNumReduceTasks(1);
    conf.setMapperClass(BayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesThetaNormalizerReducer.class);
    conf.setReducerClass(BayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters and make or break a piece of code
   
    HadoopUtil.delete(conf, outPath);

    Path sigmaKFiles = new Path(output, "trainer-weights/Sigma_k/*");
    Map<String,Double> labelWeightSum = SequenceFileModelReader.readLabelSums(sigmaKFiles, conf);
    DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(conf,
        GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);
   
    log.info("Sigma_k for Each Label");
    Map<String,Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);
   
    Path sigmaJSigmaKFile = new Path(output, "trainer-weights/Sigma_kSigma_j/*");
    double sigmaJSigmaK = SequenceFileModelReader.readSigmaJSigmaK(sigmaJSigmaKFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
   
    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
    log.info("{}", retSigmaJSigmaK);
    conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
   
    Path vocabCountFile = new Path(output, "trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);
   
    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);
    conf.set("bayes.parameters", params.toString());
    conf.set("output.table", output.toString());
    client.setConf(conf);
   
    JobClient.runJob(conf);
   
  }
View Full Code Here

 
  private static final Logger log = LoggerFactory.getLogger(CBayesThetaNormalizerDriver.class);

  @Override
  public void runJob(Path input, Path output, BayesParameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);
    conf.setJobName("Complementary Bayes Theta Normalizer Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output, "trainer-weights/Sigma_j"));
    FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output, "trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    // conf.setNumMapTasks(100);
    // conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(CBayesThetaNormalizerReducer.class);
    conf.setReducerClass(CBayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("io.serializations",
             "org.apache.hadoop.io.serializer.JavaSerialization,"
             + "org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters and make or break a piece of code
   
    HadoopUtil.delete(conf, outPath);
   
    Path sigmaKFiles = new Path(output, "trainer-weights/Sigma_k/*");
    Map<String,Double> labelWeightSum = SequenceFileModelReader.readLabelSums(sigmaKFiles, conf);
    DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(conf,
        GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);
   
    log.info("Sigma_k for Each Label");
    Map<String,Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);
   
    Path sigmaKSigmaJFile = new Path(output, "trainer-weights/Sigma_kSigma_j/*");
    double sigmaJSigmaK = SequenceFileModelReader.readSigmaJSigmaK(sigmaKSigmaJFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
   
    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
    log.info("{}", retSigmaJSigmaK);
    conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
   
    Path vocabCountFile = new Path(output, "trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);
   
    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);
    conf.set("bayes.parameters", params.toString());
    conf.set("output.table", output.toString());
    client.setConf(conf);
   
    JobClient.runJob(conf);
   
  }
View Full Code Here

        throw new IllegalStateException("Error finding hadoop SASL properties", e);
      }
    }
    // 2.5 and later way of finding sasl property
    try {
      Configurable saslPropertiesResolver = (Configurable) RES_GET_INSTANCE_METHOD.invoke(null,
          conf);
      saslPropertiesResolver.setConf(conf);
      return (Map<String, String>) GET_DEFAULT_PROP_METHOD.invoke(saslPropertiesResolver);
    } catch (Exception e) {
      throw new IllegalStateException("Error finding hadoop SASL properties", e);
    }
  }
View Full Code Here

      }
      else {
        // 2.4 and later way of finding SASL_PROPS property due to change from HADOOP-10221,HADOOP-10451
        Map<String, String> saslProps = new TreeMap<String, String>();
        try {
          Configurable saslPropertiesResolver = (Configurable) RES_GET_INSTANCE_METHOD.invoke(null,
              conf);
          saslPropertiesResolver.setConf(conf);
          saslProps = (Map<String, String>) GET_PROP_METHOD.invoke(saslPropertiesResolver, InetAddress.getLocalHost());
          transFactory.addServerDefinition(
              AuthMethod.KERBEROS.getMechanismName(),
              names[0], names[1]// two parts of kerberos principal
              saslProps,
View Full Code Here

TOP

Related Classes of org.apache.hadoop.conf.Configurable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.