Examples of Configurable


Examples of org.apache.hadoop.conf.Configurable

   *
   * @param params
   *          The Job parameters containing the gramSize, input output folders, defaultCat, encoding
   */
  public static void runJob(Parameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesClassifierDriver.class);
    conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath"));
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath")));
    Path outPath = new Path(params.get("testDirPath") + "-output");
    FileOutputFormat.setOutputPath(conf, outPath);
   
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setMapperClass(BayesClassifierMapper.class);
    conf.setCombinerClass(BayesClassifierReducer.class);
    conf.setReducerClass(BayesClassifierReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    conf.set("bayes.parameters", params.toString());
   
    client.setConf(conf);
    JobClient.runJob(conf);
   
    Path outputFiles = new Path(outPath.toString() + "/part*");
    ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params);
    log.info("{}", matrix.summarize());
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

   * @param output
   *          the output pathname String
   */
  @Override
  public void runJob(String input, String output, BayesParameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);
    conf.setJobName("Complementary Bayes Theta Normalizer Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-weights/Sigma_j"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    // conf.setNumMapTasks(100);
    // conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(CBayesThetaNormalizerReducer.class);
    conf.setReducerClass(CBayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf
        .set("io.serializations",
          "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters and make or break a piece of code
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    Path sigmaKFiles = new Path(output + "/trainer-weights/Sigma_k/*");
    Map<String,Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, sigmaKFiles, conf);
    DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(conf,
        GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);
   
    log.info("Sigma_k for Each Label");
    Map<String,Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);
   
    Path sigmaKSigmaJFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
    double sigmaJSigmaK = SequenceFileModelReader.readSigmaJSigmaK(dfs, sigmaKSigmaJFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
   
    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
    log.info("{}", retSigmaJSigmaK);
    conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
   
    Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);
   
    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);
    conf.set("bayes.parameters", params.toString());
    conf.set("output.table", output);
    client.setConf(conf);
   
    JobClient.runJob(conf);
   
  }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

                                         int maxDFPercent,
                                         Path dictionaryFilePath,
                                         Path output,
                                         boolean sequentialAccess) throws IOException {
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(TFIDFConverter.class);
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
    // this conf parameter needs to be set enable serialisation of conf values
   
    conf.setJobName(": MakePartialVectors: input-folder: " + input + ", dictionary-file: "
                    + dictionaryFilePath.toString());
    conf.setLong(FEATURE_COUNT, featureCount);
    conf.setLong(VECTOR_COUNT, vectorCount);
    conf.setInt(MIN_DF, minDf);
    conf.setInt(MAX_DF_PERCENTAGE, maxDFPercent);
    conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(VectorWritable.class);
    DistributedCache.setCacheFiles(new URI[] {dictionaryFilePath.toUri()}, conf);
    FileInputFormat.setInputPaths(conf, new Path(input));
   
    FileOutputFormat.setOutputPath(conf, output);
   
    conf.setMapperClass(IdentityMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setReducerClass(TFIDFPartialVectorReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    FileSystem dfs = FileSystem.get(output.toUri(), conf);
    if (dfs.exists(output)) {
      dfs.delete(output, true);
    }
   
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

   * Count the document frequencies of features in parallel using Map/Reduce. The input documents have to be
   * in {@link SequenceFile} format
   */
  private static void startDFCounting(Path input, Path output) throws IOException {
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(TFIDFConverter.class);
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
    // this conf parameter needs to be set enable serialisation of conf values
   
    conf.setJobName("VectorTfIdf Document Frequency Count running over input: " + input.toString());
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(LongWritable.class);
   
    FileInputFormat.setInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, output);
   
    conf.setMapperClass(TermDocumentCountMapper.class);
   
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(TermDocumentCountReducer.class);
    conf.setReducerClass(TermDocumentCountReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    FileSystem dfs = FileSystem.get(output.toUri(), conf);
    if (dfs.exists(output)) {
      dfs.delete(output, true);
    }
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

                                         boolean sequentialAccess) throws IOException {
    if (normPower != NO_NORMALIZING && normPower < 0) {
      throw new IllegalArgumentException("normPower must either be -1 or >= 0");
    }
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(PartialVectorMerger.class);
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
    // this conf parameter needs to be set enable serialisation of conf values
    conf.setJobName("PartialVectorMerger::MergePartialVectors");
    conf.setBoolean(SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setInt(DIMENSION, dimension);
    conf.setFloat(NORMALIZATION_POWER, normPower);
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(VectorWritable.class);
   
    FileInputFormat.setInputPaths(conf, getCommaSeparatedPaths(partialVectorPaths));
   
    Path outputPath = new Path(output);
    FileOutputFormat.setOutputPath(conf, outputPath);
   
    conf.setMapperClass(IdentityMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setReducerClass(PartialVectorMergeReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    FileSystem dfs = FileSystem.get(outputPath.toUri(), conf);
    if (dfs.exists(outputPath)) {
      dfs.delete(outputPath, true);
    }
   
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

    }
  }

  public static <T> T configure(T t) {
    if (t instanceof Configurable) {
      Configurable configurable = (Configurable) t;
      configurable.setConf(new Configuration());
    }
    return t;
  }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

  protected CompressionCodec getInstance(String compressionClass) throws IOException {
    try {
      CompressionCodec codec = (CompressionCodec) Class.forName(compressionClass).newInstance();
      if (codec instanceof Configurable) {
        Configurable configurable = (Configurable) codec;
        configurable.setConf(_configuration);
      }
      return codec;
    } catch (Exception e) {
      throw new IOException(e);
    }
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

                            String measureClassName,
                            double t1,
                            double t2,
                            double convergenceDelta) {
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(MeanShiftCanopy.class);
    conf.setJobName("MeanShiftCanopyCluster");
   
    FileInputFormat.setInputPaths(conf, input);
    Path outPath = new Path(output);
    Path controlPath = new Path(input.getParent(), UUID.randomUUID().toString());
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setMapperClass(MeanShiftCanopyMapper.class);
    conf.setReducerClass(MeanShiftCanopyReducer.class);
    conf.setNumReduceTasks(1);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setBoolean("mapred.output.compress", true);
    conf.setClass("mapred.output.compression.codec", GzipCodec.class,  CompressionCodec.class);
    conf.set(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
    conf.set(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY, String.valueOf(convergenceDelta));
    conf.set(MeanShiftCanopyConfigKeys.T1_KEY, String.valueOf(t1));
    conf.set(MeanShiftCanopyConfigKeys.T2_KEY, String.valueOf(t2));
    conf.set(MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, controlPath.toString());
    client.setConf(conf);
    try {
      JobClient.runJob(conf);
      input.getFileSystem(conf).delete(input.getParent(), true);
    } catch (IOException e) {
      LOG.warn(e.toString(), e);
View Full Code Here

Examples of org.apache.hadoop.conf.Configurable

   * @param output
   *          the output pathname String
   */
  public static Path createCanopyFromVectors(String input) {
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
    conf.setJobName("CreateCanopyFromVectors");
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(MeanShiftCanopy.class);
   
    FileInputFormat.setInputPaths(conf, new Path(input));
    Path tmpDir = new Path(new Path(conf.get("hadoop.tmp.dir")), UUID.randomUUID().toString());
    Path outPath = new Path(tmpDir, UUID.randomUUID().toString());
    FileOutputFormat.setOutputPath(conf, outPath);
   
    conf.setMapperClass(MeanShiftCanopyCreatorMapper.class);
    conf.setNumReduceTasks(0);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    client.setConf(conf);
    try {
      JobClient.runJob(conf);
      return outPath;
    } catch (IOException e) {
      LOG.warn(e.toString(), e);
View Full Code Here

Examples of org.apache.myfaces.tobago.config.Configurable

  /**
   * Tests TOBAGO-1134
   */
  public Measure getValue() {

    Configurable c = new Configurable() {
      public String getRendererType() {
        return "Test";
      }

      public Markup getCurrentMarkup() {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.