Package org.apache.mahout.utils.vectors

Examples of org.apache.mahout.utils.vectors.TermEntry


    vector = new RandomAccessSparseVector(termInfo.totalTerms(field));
    this.numTerms = numTerms;
  }
 
  public void map(BytesRef term, int frequency) {
    TermEntry entry = termInfo.getTermEntry(field, term.utf8ToString());
    if (entry != null) {
      vector.setQuick(entry.getTermIdx(), weight.calculate(frequency, entry.getDocFreq(), (int)numTerms, numDocs));
    }
  }
View Full Code Here


      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();

      TermEntry entry = new TermEntry(term.utf8ToString(), count++, inclusterDF);
      termEntryMap.put(entry.getTerm(), entry);

    }

    List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList();
View Full Code Here

        Text term = new Text();
        IntWritable termIndex = new IntWritable();

        Iterator<TermEntry> termEntries = termInfo.getAllEntries();
        while (termEntries.hasNext()) {
          TermEntry termEntry = termEntries.next();
          term.set(termEntry.getTerm());
          termIndex.set(termEntry.getTermIdx());
          seqWriter.append(term, termIndex);
        }
      } finally {
        Closeables.close(seqWriter, false);
      }
View Full Code Here

    writer.write(String.valueOf(ti.totalTerms(field)));
    writer.write("\n");
    writer.write("#term" + delimiter + "doc freq" + delimiter + "idx");
    writer.write("\n");
    while (entIter.hasNext()) {
      TermEntry entry = entIter.next();
      writer.write(entry.term);
      writer.write(delimiter);
      writer.write(String.valueOf(entry.docFreq));
      writer.write(delimiter);
      writer.write(String.valueOf(entry.termIdx));
View Full Code Here

      }
      int df = te.docFreq();
      if (df < minDf || df > percent){
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.term, entry);
    } while (te.next());
    te.close();
  }
View Full Code Here

    this.numTerms = numTerms;
  }

  @Override
  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
    TermEntry entry = termInfo.getTermEntry(field, term);
    if (entry != null) {
      vector.setQuick(entry.termIdx, weight.calculate(frequency, entry.docFreq, numTerms, numDocs));
    }
  }
View Full Code Here

    writer.write(String.valueOf(ti.totalTerms(field)));
    writer.write('\n');
    writer.write("#term" + delimiter + "doc freq" + delimiter + "idx");
    writer.write('\n');
    while (entIter.hasNext()) {
      TermEntry entry = entIter.next();
      writer.write(entry.getTerm());
      writer.write(delimiter);
      writer.write(String.valueOf(entry.getDocFreq()));
      writer.write(delimiter);
      writer.write(String.valueOf(entry.getTermIdx()));
      writer.write('\n');
    }
    writer.flush();
    writer.close();
  }
View Full Code Here

      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();

      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.getTerm(), entry);
    } while (te.next());
    te.close();

    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
View Full Code Here

      }
      int df = te.docFreq();
      if (df < minDf || df > percent) {
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.getTerm(), entry);
    } while (te.next());
    te.close();
  }
View Full Code Here

    this.numTerms = numTerms;
  }
 
  @Override
  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
    TermEntry entry = termInfo.getTermEntry(field, term);
    if (entry != null) {
      vector.setQuick(entry.getTermIdx(), weight.calculate(frequency, entry.getDocFreq(), numTerms, numDocs));
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.utils.vectors.TermEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.