Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


    List<String> oValue = new ArrayList<String>();
    for (int selectedField : selectedFields) {
      oValue.add(fields[selectedField]);
    }
   
    context.write(new Text(oKey.toString()), new StringTuple(oValue));
   
  }
View Full Code Here


    Text key = new Text();
    key.set("dummy-key");
   
    String[] input = {"the", "best", "of", "times", "the", "worst", "of",
    "times"};
    StringTuple inputTuple = new StringTuple();
    for (String i : input) {
      inputTuple.add(i);
    }
   
    String[][] values = { {"h_the", "the best"},
                          {"t_best", "the best"},
                          {"h_of", "of times"},
View Full Code Here

    Text key = new Text();
    key.set("dummy-key");
   
    String[] input = {"the", "best", "of", "times", "the", "worst", "of",
    "times"};
    StringTuple inputTuple = new StringTuple();
    for (String i : input) {
      inputTuple.add(i);
    }
   
    String[][] values = {{"h_the", "the best"},
                                         {"t_best", "the best"},
                                         {"h_of", "of times"},
View Full Code Here

  protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString()));
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();
    StringTuple document = new StringTuple();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    stream.end();
    Closeables.close(stream, true);
    context.write(key, document);
View Full Code Here

    }
   
    for (NamedVector seedVector : seedVectors) {
      double distance = measure.distance(seedVector, valVec);
      if (!usesThreshold || distance <= maxDistance) {
        StringTuple outKey = new StringTuple();
        outKey.add(seedVector.getName());
        outKey.add(keyName);
        context.write(outKey, new DoubleWritable(distance));
      }
    }
  }
View Full Code Here

  private static final VarIntWritable ONE = new VarIntWritable(1);

  @Override
  protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    StringTuple tuple = new StringTuple(key.toString());
    tuple.add(value.toString());
    context.write(tuple, ONE);
  }
View Full Code Here

    throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
      return;
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);
      sf.reset();
      try {
        do {
          String term = sf.getAttribute(CharTermAttribute.class).toString();
          if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
            int termId = dictionary.get(term);
            vector.setQuick(termId, vector.getQuick(termId) + 1);
          }
        } while (sf.incrementToken());

        sf.end();
      } finally {
        Closeables.close(sf, true);
      }
    } else {
      for (String term : value.getEntries()) {
        if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
          int termId = dictionary.get(term);
          vector.setQuick(termId, vector.getQuick(termId) + 1);
        }
      }
View Full Code Here

    Text key = new Text();
    key.set("dummy-key");
   
    String[] input = {"the", "best", "of", "times", "the", "worst", "of",
    "times"};
    StringTuple inputTuple = new StringTuple();
    for (String i : input) {
      inputTuple.add(i);
    }
   
    String[][] values = { {"h_the", "the best"},
                          {"t_best", "the best"},
                          {"h_of", "of times"},
View Full Code Here

    Text key = new Text();
    key.set("dummy-key");
   
    String[] input = {"the", "best", "of", "times", "the", "worst", "of",
    "times"};
    StringTuple inputTuple = new StringTuple();
    for (String i : input) {
      inputTuple.add(i);
    }
   
    String[][] values = {{"h_the", "the best"},
                                         {"t_best", "the best"},
                                         {"h_of", "of times"},
View Full Code Here

  @Test
  public void testVectorDistanceMapper() throws Exception {
    Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable>.Context context =
            EasyMock.createMock(Mapper.Context.class);
    StringTuple tuple = new StringTuple();
    tuple.add("foo");
    tuple.add("123");
    context.write(tuple, new DoubleWritable(Math.sqrt(2.0)));
    tuple = new StringTuple();
    tuple.add("foo2");
    tuple.add("123");
    context.write(tuple, new DoubleWritable(1));

    EasyMock.replay(context);

    Vector vector = new RandomAccessSparseVector(2);
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.