Package org.apache.hadoop.io

Examples of org.apache.hadoop.io.DoubleWritable$Comparator


    assertEquals(md5HashKey1, md5HashKey2);
  }

  @Test
  public void testMD5HashForDoubleWritableKey() throws IOException {
    DoubleWritable key = new DoubleWritable(123d);
    MD5Hash md5HashKey1 = HashUtility.getMD5Hash(key);
    MD5Hash md5HashKey2 = HashUtility.getMD5Hash(key);
    assertEquals(md5HashKey1, md5HashKey2);
  }
View Full Code Here


  }
 
  @Test
  public void testMergeByCustomObjectKeyWithSequenceFileInputFormat() throws Exception {   
    Student student1 = setStudent(new Text("Sam"),new Text("US"),new IntWritable(1),
        new LongWritable(9999999998l),new DoubleWritable(99.12));       
    Student student2 = setStudent(new Text("John"),new Text("AUS"),new IntWritable(2),
        new LongWritable(9999999999l),new DoubleWritable(90.12));       
    Student student3 = setStudent(new Text("Mary"),new Text("UK"),new IntWritable(3),
        new LongWritable(9999999988l),new DoubleWritable(69.12));   
    Student student4 = setStudent(new Text("Kelvin"),new Text("UK"),new IntWritable(4),
        new LongWritable(9999998888l),new DoubleWritable(59.12));
 
    HashMap<Student, Text> inputData1 = new HashMap<Student, Text>();
    inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
View Full Code Here

 
 
  @Test
  public void testDedupByCustomObjectKeyWithSequenceFileInputFormat() throws Exception {   
    Student student1 = setStudent(new Text("Sam"),new Text("US"),new IntWritable(1),
        new LongWritable(9999999998l),new DoubleWritable(99.12));       
    Student student2 = setStudent(new Text("John"),new Text("AUS"),new IntWritable(2),
        new LongWritable(9999999999l),new DoubleWritable(90.12));       
    Student student3 = setStudent(new Text("Mary"),new Text("UK"),new IntWritable(3),
        new LongWritable(9999999988l),new DoubleWritable(69.12));   
    Student student4 = setStudent(new Text("Kelvin"),new Text("UK"),new IntWritable(4),
        new LongWritable(9999998888l),new DoubleWritable(59.12));
 
    HashMap<Student, Text> inputData1 = new HashMap<Student, Text>();
    inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
View Full Code Here

    name.readFields(in);
    address = new Text();
    address.readFields(in);
    mobileNumber = new LongWritable();
    mobileNumber.readFields(in);
    percentage = new DoubleWritable();
    percentage.readFields(in);

  }
View Full Code Here

    map.put(new Text("two"), new VLongWritable(2));
    Writable[] writables = new Writable[] {
      new BytesWritable(new byte[] { 1, 2, 3, 4 }),
      new ByteWritable((byte) 123), new BooleanWritable(true),
      new VIntWritable(12345), new VLongWritable(123456789L),
      new FloatWritable((float) 1.2), new DoubleWritable(1.234),
      new Text("random string"),
      new ObjectWritable("test")
    };
    TypedBytesWritable tbw = new TypedBytesWritable();
    tbw.setValue("typed bytes text");
View Full Code Here

                  Reporter reporter) throws IOException {
    Object candidate = StringUtils.fromString(value.toString());
   
    double fitness = evaluator.getFitness(candidate, null);
   
    output.collect(key, new DoubleWritable(fitness));
  }
View Full Code Here

    Path output = new Path(outpath, "output.sorted");
    sorter.merge(outfiles, output);
   
    // import the evaluations
    LongWritable key = new LongWritable();
    DoubleWritable value = new DoubleWritable();
    Reader reader = new Reader(fs, output, conf);
    try {
      while (reader.next(key, value)) {
        evaluations.add(value.get());
      }
    } finally {
      reader.close();
    }
  }
View Full Code Here

   
    int[] gramFreq = new int[2];
    gramFreq[0] = gramFreq[1] = -1;
   
    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
      DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
      Text t = new Text(ngram.getString());
      output.collect(t, dd);
      return;
    }
    // FIXME: better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    while (values.hasNext()) {
      Gram value = values.next();
     
      int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;
     
      if (gramFreq[pos] != -1) {
        log.warn("Extra {} for {}, skipping", value.getType(), ngram);
        if (value.getType() == Gram.Type.HEAD) {
          reporter.incrCounter(Skipped.EXTRA_HEAD, 1);
        } else {
          reporter.incrCounter(Skipped.EXTRA_TAIL, 1);
        }
        return;
      }
     
      gram[pos] = value.getString();
      gramFreq[pos] = value.getFrequency();
    }
   
    if (gramFreq[0] == -1) {
      log.warn("Missing head for {}, skipping.", ngram);
      reporter.incrCounter(Skipped.MISSING_HEAD, 1);
      return;
    } else if (gramFreq[1] == -1) {
      log.warn("Missing tail for {}, skipping", ngram);
      reporter.incrCounter(Skipped.MISSING_TAIL, 1);
      return;
    }
   
    int k11 = ngram.getFrequency(); /* a&b */
    int k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    int k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    int k22 = (int) (ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency())); /* !a&!b */
   
    try {
      double llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
      if (llr < minLLRValue) {
        reporter.incrCounter(Skipped.LESS_THAN_MIN_LLR, 1);
        return;
      }
      DoubleWritable dd = new DoubleWritable(llr);
      Text t = new Text(ngram.getString());
      output.collect(t, dd);
    } catch (IllegalArgumentException ex) {
      reporter.incrCounter(Skipped.LLR_CALCULATION_ERROR, 1);
      log.error("Problem calculating LLR ratio: " + ex.getMessage());
View Full Code Here

        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(label), Bytes
            .toBytes(weightSumPerLabel));
        table.put(bu);
      }
    }
    output.collect(key, new DoubleWritable(weightSumPerLabel));
   
  }
View Full Code Here

    } else {
      CollocDriver.generateAllGrams(inputPath.toString(), dictionaryJobPath.toString(), maxNGramSize,
        minSupport, minLLRValue, numReducers);
      dictionaryChunks = createDictionaryChunks(minSupport, new Path(
          output + DICTIONARY_JOB_FOLDER, CollocDriver.NGRAM_OUTPUT_DIRECTORY), output,
        chunkSizeInMegabytes, new DoubleWritable(), maxTermDimension);
    }
   
    int partialVectorIndex = 0;
    List<Path> partialVectorPaths = new ArrayList<Path>();
    for (Path dictionaryChunk : dictionaryChunks) {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.io.DoubleWritable$Comparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.