Package edu.umd.cloud9.io.array

Examples of edu.umd.cloud9.io.array.ArrayListOfFloatsWritable


          "-output_path=" + indexRootPath + "/wiki-docid-tmp",
          "-output_file=" + mappingFile.toString(),
          "-wiki_language=" + collectionLang };
      LOG.info("Running BuildWikipediaDocnoMapping with args " + Arrays.toString(arr));

      BuildWikipediaDocnoMapping tool = new BuildWikipediaDocnoMapping();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(new Path(indexRootPath + "/wiki-docid-tmp"), true);
    } else {
      LOG.info("Docno mapping already exists at: " + mappingFile);
    }

    // Repack Wikipedia into sequential compressed block
    if (!fs.exists(new Path(seqCollection + "/part-00000"))) {
      LOG.info(seqCollection + " doesn't exist, creating...");
      String[] arr = new String[] { "-input=" + rawCollection,
          "-output=" + seqCollection,
          "-mapping_file=" + mappingFile.toString(),
          "-compression_type=block",
          "-wiki_language=" + collectionLang };
      LOG.info("Running RepackWikipedia with args " + Arrays.toString(arr));

      RepackWikipedia tool = new RepackWikipedia();
      tool.setConf(conf);
      tool.run(arr);
    } else {
      LOG.info("Repacked collection already exists at: " + seqCollection);     
    }

    conf.set(Constants.CollectionName, "Wikipedia-"+collectionLang);
View Full Code Here


        "-mapping_file=" + mappingFile.toString(),
        "-compression_type=block",
        "-wiki_language=" + collectionLang };
    LOG.info("Running RepackWikipedia with args " + Arrays.toString(arr));

    RepackWikipedia tool = new RepackWikipedia();
    tool.setConf(conf);
    tool.run(arr);

    conf.set(Constants.CollectionName, "Wikipedia-"+collectionLang);
    conf.setInt(Constants.NumMapTasks, numMappers);
    conf.setInt(Constants.NumReduceTasks, numReducers);
    conf.set(Constants.CollectionPath, seqCollection);
View Full Code Here

    // Repack Wikipedia into sequential compressed block
    p = new Path(seqCollection);
    if (!fs.exists(p)) {
      LOG.info(seqCollection + " doesn't exist, creating...");
      String[] arr = new String[] { rawCollection, seqCollection, mappingFile.toString(), "block"};
      RepackWikipedia tool = new RepackWikipedia();
      tool.setConf(conf);
      tool.run(arr);
    }

    conf.set("Ivory.CollectionName", "Wikipedia-"+collectionLang);
    conf.setInt("Ivory.NumMapTasks", numMappers);
    conf.setInt("Ivory.NumReduceTasks", numReducers);
View Full Code Here

          "-mapping_file=" + mappingFile.toString(),
          "-compression_type=block",
          "-wiki_language=" + collectionLang };
      LOG.info("Running RepackWikipedia with args " + Arrays.toString(arr));

      RepackWikipedia tool = new RepackWikipedia();
      tool.setConf(conf);
      tool.run(arr);
    } else {
      LOG.info("Repacked collection already exists at: " + seqCollection);     
    }

    conf.set(Constants.CollectionName, "Wikipedia-"+collectionLang);
View Full Code Here

        if (fileStats[i].getPath().getName().startsWith("_")) {
          continue;
        }

        LOG.info("processing " + fileStats[i].getPath());
        FSLineReader reader = new FSLineReader(fileStats[i].getPath(), fs);

        Text line = new Text();
        while (reader.readLine(line) > 0) {
          String[] arr = line.toString().split("\\t+", 2);

          int docno = Integer.parseInt(arr[0]);
          int len = Integer.parseInt(arr[1]);

          // Note that because of speculative execution there may be
          // multiple copies of doclength data. Therefore, we can't
          // just count number of doclengths read. Instead, keep track
          // of largest docno encountered.
          if (docno < docnoOffset) {
            throw new RuntimeException(
                "Error: docno " + docno + " < docnoOffset " + docnoOffset + "!");
          }

          doclengths[docno - docnoOffset] = len;

          if (docno > maxDocno) {
            maxDocno = docno;
          }
          if (docno < minDocno) {
            minDocno = docno;
          }
        }
        reader.close();
        context.getCounter(DocLengths.Files).increment(1);
      }

      LOG.info("min docno: " + minDocno);
      LOG.info("max docno: " + maxDocno);
View Full Code Here

     *  Date:         8/20/04
     *  Compilation:  javac StdGaussian.java
     *  Execution:    java StdGaussian 
     **************************************************************************/
    double r, x, y;
    ArrayListOfFloatsWritable vector = new ArrayListOfFloatsWritable(numSamples);
    vector.setSize(numSamples);

    double normalizationFactor = 0;
    for(int i=0;i<numSamples;i++){

      // find a uniform random point (x, y) inside unit circle
      do {
        x = 2.0 * Math.random() - 1.0;
        y = 2.0 * Math.random() - 1.0;
        r = x*x + y*y;
      } while (r > 1 || r == 0);      // loop executed 4 / pi = 1.273.. times on average
      // http://en.wikipedia.org/wiki/Box-Muller_transform

      // apply the Box-Muller formula to get standard Gaussian z   
      double f = (x * Math.sqrt(-2.0 * Math.log(r) / r));
      normalizationFactor+=Math.pow(f, 2.0);
      vector.set(i,(float) f);
    }

    /*normalize vector*/
    normalizationFactor=Math.sqrt(normalizationFactor);
    for(int i=0;i<vector.size();i++){
      float val = vector.get(i);
      float newf = (float) (val/normalizationFactor);
      vector.set(i, newf);
    }
    return vector;
  }
View Full Code Here

    return vector;
  }
 
  public static FloatAsBytesWritable generateUnitRandomVectorAsBytes(int numSamples) {
    double r, x, y;
    ArrayListOfFloatsWritable vector = new ArrayListOfFloatsWritable(numSamples);
    vector.setSize(numSamples);
   
    byte[] bytes = new byte[numSamples];
    float max=Float.MIN_VALUE;
    float min=Float.MAX_VALUE;
   
    for(int i=0;i<numSamples;i++){

      // find a uniform random point (x, y) inside unit circle
      do {
        x = 2.0 * Math.random() - 1.0;
        y = 2.0 * Math.random() - 1.0;
        r = x*x + y*y;
      } while (r > 1 || r == 0);      // loop executed 4 / pi = 1.273.. times on average
      // http://en.wikipedia.org/wiki/Box-Muller_transform

      // apply the Box-Muller formula to get standard Gaussian z   
      float f = (float) (x * Math.sqrt(-2.0 * Math.log(r) / r));
      vector.set(i, f);
      if(f>0 && f>max){
        max=f;
      }else if(f<0 && f<min){
        min=f;
      }
     
    }

//    System.out.println(max);
//    System.out.println(min);

    /*normalize vector*/
    for(int i=0;i<vector.size();i++){
      float val = vector.get(i);
      float normalized2one=0.0f;
      //map values to [-1,1] range
      if(val>0){
        normalized2one = val/max;
      }else if(val<0){
View Full Code Here

    /*************************************************************************
     * Author: Kevin Wayne Date: 8/20/04 Compilation: javac StdGaussian.java Execution: java
     * StdGaussian
     **************************************************************************/
    double r, x, y;
    ArrayListOfFloatsWritable vector = new ArrayListOfFloatsWritable(numSamples);
    vector.setSize(numSamples);

    double normalizationFactor = 0;
    for (int i = 0; i < numSamples; i++) {

      // find a uniform random point (x, y) inside unit circle
      do {
        x = 2.0 * Math.random() - 1.0;
        y = 2.0 * Math.random() - 1.0;
        r = x * x + y * y;
      } while (r > 1 || r == 0); // loop executed 4 / pi = 1.273.. times on average
      // http://en.wikipedia.org/wiki/Box-Muller_transform

      // apply the Box-Muller formula to get standard Gaussian z
      double f = (x * Math.sqrt(-2.0 * Math.log(r) / r));
      normalizationFactor += Math.pow(f, 2.0);
      vector.set(i, (float) f);
    }

    /* normalize vector */
    normalizationFactor = Math.sqrt(normalizationFactor);
    for (int i = 0; i < vector.size(); i++) {
      float val = vector.get(i);
      float newf = (float) (val / normalizationFactor);
      vector.set(i, newf);
    }
    return vector;
  }
View Full Code Here

    return vector;
  }

  public static FloatAsBytesWritable generateUnitRandomVectorAsBytes(int numSamples) {
    double r, x, y;
    ArrayListOfFloatsWritable vector = new ArrayListOfFloatsWritable(numSamples);
    vector.setSize(numSamples);

    byte[] bytes = new byte[numSamples];
    float max = Float.MIN_VALUE;
    float min = Float.MAX_VALUE;

    for (int i = 0; i < numSamples; i++) {

      // find a uniform random point (x, y) inside unit circle
      do {
        x = 2.0 * Math.random() - 1.0;
        y = 2.0 * Math.random() - 1.0;
        r = x * x + y * y;
      } while (r > 1 || r == 0); // loop executed 4 / pi = 1.273.. times on average
      // http://en.wikipedia.org/wiki/Box-Muller_transform

      // apply the Box-Muller formula to get standard Gaussian z
      float f = (float) (x * Math.sqrt(-2.0 * Math.log(r) / r));
      vector.set(i, f);
      if (f > 0 && f > max) {
        max = f;
      } else if (f < 0 && f < min) {
        min = f;
      }

    }

    // System.out.println(max);
    // System.out.println(min);

    /* normalize vector */
    for (int i = 0; i < vector.size(); i++) {
      float val = vector.get(i);
      float normalized2one = 0.0f;
      // map values to [-1,1] range
      if (val > 0) {
        normalized2one = val / max;
      } else if (val < 0) {
View Full Code Here

    /*************************************************************************
     * Author: Kevin Wayne Date: 8/20/04 Compilation: javac StdGaussian.java Execution: java
     * StdGaussian
     **************************************************************************/
    double r, x, y;
    ArrayListOfFloatsWritable vector = new ArrayListOfFloatsWritable(numSamples);
    vector.setSize(numSamples);

    double normalizationFactor = 0;
    for (int i = 0; i < numSamples; i++) {

      // find a uniform random point (x, y) inside unit circle
      do {
        x = 2.0 * Math.random() - 1.0;
        y = 2.0 * Math.random() - 1.0;
        r = x * x + y * y;
      } while (r > 1 || r == 0); // loop executed 4 / pi = 1.273.. times on average
      // http://en.wikipedia.org/wiki/Box-Muller_transform

      // apply the Box-Muller formula to get standard Gaussian z
      double f = (x * Math.sqrt(-2.0 * Math.log(r) / r));
      normalizationFactor += Math.pow(f, 2.0);
      vector.set(i, (float) f);
    }

    /* normalize vector */
    normalizationFactor = Math.sqrt(normalizationFactor);
    for (int i = 0; i < vector.size(); i++) {
      float val = vector.get(i);
      float newf = (float) (val / normalizationFactor);
      vector.set(i, newf);
    }
    return vector;
  }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.array.ArrayListOfFloatsWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.