Package org.apache.mahout.utils.vectors

Examples of org.apache.mahout.utils.vectors.TermInfo


      writer.addDocument(doc);
    }
    writer.close();
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);

    int numTerms = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      it.next();
      numTerms++;
    }
    termDictionary = new String[numTerms];
    int i = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      String term = it.next().getTerm();
      termDictionary[i] = term;
      System.out.println(i + " " + term);
      i++;
    }
View Full Code Here


  @Test
  public void testIterable() throws Exception {
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper);

    //TODO: do something more meaningful here
    for (Vector vector : iterable) {
View Full Code Here

  public void testIterable_noTermVectors() throws IOException {
    RAMDirectory directory = createTestIndex(Field.TermVector.NO);

    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper);

    Iterator<Vector> iterator = iterable.iterator();
    iterator.hasNext();
View Full Code Here

    //get real vectors
    createTestIndex(Field.TermVector.NO, directory, false, 5);
     
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
   
    boolean exceptionThrown;
    //0 percent tolerance
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper);
View Full Code Here

   
    IndexReader reader = DirectoryReader.open(directory);
  

    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
   
    int numTerms = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      it.next();
      numTerms++;
    }
    termDictionary = new String[numTerms];
    int i = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      String term = it.next().getTerm();
      termDictionary[i] = term;
      System.out.println(i + " " + term);
      i++;
    }
View Full Code Here

  @Test
  public void testIterable() throws Exception {
    IndexReader reader = DirectoryReader.open(directory);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", termInfo,weight);

    //TODO: do something more meaningful here
    for (Vector vector : iterable) {
      assertNotNull(vector);
View Full Code Here

  public void testIterableNoTermVectors() throws IOException {
    RAMDirectory directory = createTestIndex(TYPE_NO_TERM_VECTORS);
    IndexReader reader = DirectoryReader.open(directory);

    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content",  termInfo,weight);

    Iterator<Vector> iterator = iterable.iterator();
    Iterators.advance(iterator, 1);
  }
View Full Code Here

    //get real vectors
    createTestIndex(TYPE_NO_TERM_VECTORS, directory, 5);
    IndexReader reader = DirectoryReader.open(directory);

    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
   
    boolean exceptionThrown;
    //0 percent tolerance
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", termInfo, weight);
    try {
View Full Code Here

    } finally {
      Closeables.closeQuietly(writer);
    }
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
   
    int numTerms = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      it.next();
      numTerms++;
    }
    termDictionary = new String[numTerms];
    int i = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      String term = it.next().getTerm();
      termDictionary[i] = term;
      System.out.println(i + " " + term);
      i++;
    }
View Full Code Here

    } finally {
      Closeables.closeQuietly(writer);
    }
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
    Iterable<Vector> iterable = new LuceneIterable(reader, "id", "content", mapper);
   
    int i = 0;
    for (Vector vector : iterable) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.utils.vectors.TermInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.