Package org.apache.mahout.text.doc

Examples of org.apache.mahout.text.doc.SingleFieldDocument


    HadoopUtil.delete(configuration, indexPath1);
  }

  @Test
  public void testGetSegment() throws Exception {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
    for (SingleFieldDocument doc : docs) {
      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
    }
View Full Code Here


    assertSegmentContainsOneDoc("_2");
  }

  @Test(expected = IllegalArgumentException.class)
  public void testGetSegmentNonExistingSegment() throws Exception {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
    for (SingleFieldDocument doc : docs) {
      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
    }
View Full Code Here

    HadoopUtil.delete(conf, indexPath1);
  }

  @Test
  public void testGetSplits() throws IOException, InterruptedException {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    //generate 3 segments
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc1);
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc2);
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc3);
View Full Code Here

      public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration configuration, List<Path> indexPaths, Path seqPath, String idField, List<String> fields) {
        lucene2SeqConf = new LuceneStorageConfiguration(configuration, indexPaths, seqPath, idField, fields);
        return lucene2SeqConf;
      }
    };
    commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool"));
    commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool"));
  }
View Full Code Here

  public void setUp() throws Exception {
    super.setUp();
    indexPath1 = getTestTempDirPath("index1");
    indexPath2 = getTestTempDirPath("index2");
    for (int i = 0; i < 2000; i++) {
      docs.add(new SingleFieldDocument(String.valueOf(i), "This is test document " + i));
    }
    misshapenDocs.add(new SingleFieldDocument("", "This doc has an empty id"));
    misshapenDocs.add(new SingleFieldDocument("empty_value", ""));
  }
View Full Code Here

    HadoopUtil.delete(configuration, indexPath1);
  }

  @Test
  public void testGetSegment() throws Exception {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
    for (SingleFieldDocument doc : docs) {
      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
    }
View Full Code Here

    assertSegmentContainsOneDoc("_2");
  }

  @Test(expected = IllegalArgumentException.class)
  public void testGetSegmentNonExistingSegment() throws Exception {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
    for (SingleFieldDocument doc : docs) {
      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
    }
View Full Code Here

    HadoopUtil.delete(conf, indexPath1);
  }

  @Test
  public void testGetSplits() throws IOException, InterruptedException {
    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

    //generate 3 segments
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc1);
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc2);
    commitDocuments(getDirectory(getIndexPath1AsFile()), doc3);
View Full Code Here

  public void setUp() throws Exception {
    super.setUp();
    indexPath1 = getTestTempDirPath("index1");
    indexPath2 = getTestTempDirPath("index2");
    for (int i = 0; i < 2000; i++) {
      docs.add(new SingleFieldDocument(String.valueOf(i), "This is test document " + i));
    }
    misshapenDocs.add(new SingleFieldDocument("", "This doc has an empty id"));
    misshapenDocs.add(new SingleFieldDocument("empty_value", ""));
  }
View Full Code Here

      public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration configuration, List<Path> indexPaths, Path seqPath, String idField, List<String> fields) {
        lucene2SeqConf = new LuceneStorageConfiguration(configuration, indexPaths, seqPath, idField, fields);
        return lucene2SeqConf;
      }
    };
    commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool"));
    commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool"));
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.text.doc.SingleFieldDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.