Package edu.buffalo.cse.ir.wikiindexer.tokenizer

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream


    try {
      while (remaining > 0) {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
                  switch (fld) {
                  case TERM:
                    termRunner.addToIndex(tokenmap,
View Full Code Here


    for (int i = 0; i < numdocs; i++) {
      try {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
                  switch (fld) {
                  case TERM:
                    termRunner.addToIndex(tokenmap,
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#reset()}.
   */
  @Test
  public void testReset() {
    //empty / null
    TokenStream stream = new TokenStream((String)null);
    stream.reset();
    assertNull(stream.next());
    stream = null;
   
    stream = new TokenStream("");
    stream.reset();
    assertNull(stream.next());
    stream = null;
   
    //positive run
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    stream.reset();
    assertEquals("this", stream.next());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#seekEnd()}.
   */
  @Test
  public void testSeekEnd() {
    //empty / null
    TokenStream stream = new TokenStream((String)null);
    stream.seekEnd();
    assertNull(stream.next());
    stream = null;
   
    stream = new TokenStream("");
    stream.seekEnd();
    assertNull(stream.next());
    stream = null;
   
    //positive run
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    assertEquals("stream", stream.previous());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#merge(edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream)}.
   */
  @Test
  public void testMerge() {
    //merge with null
    TokenStream stream1 = new TokenStream("this");
    stream1.append("is","a","test","stream");
    stream1.merge(null);
    assertEquals(5, stream1.getAllTokens().size());
   
    TokenStream stream2 = new TokenStream((String) null);
    stream1.merge(stream2);
    assertEquals(5, stream1.getAllTokens().size());
   
    stream2.merge(stream1);
    assertEquals(5, stream2.getAllTokens().size());
    stream1 = null; stream2 = null;
   
    //proper merge
    stream1 = new TokenStream("this");
    stream1.append("is","a");
    stream2 = new TokenStream("test");
    stream2.append("stream");
   
    stream1.merge(stream2);
    assertEquals(5, stream1.getAllTokens().size());
    assertEquals(5, stream1.getTokenMap().size());
    assertEquals(2, stream2.getAllTokens().size());
    assertEquals(2, stream2.getTokenMap().size());
    assertFalse(stream1.hasPrevious());
   
    for (int i = 0; i < 4; i++)
      stream1.mergeWithNext();
   
    stream1.reset();
    assertEquals("this is a test stream", stream1.next());
    stream1 = null; stream2 = null;
   
    //self merge
    stream1 = new TokenStream("this");
    stream1.append("is","a","test","stream");
    stream2 = new TokenStream("this");
    stream2.append("is","a","test","stream");
    stream1.merge(stream2);
    assertEquals(10, stream1.getAllTokens().size());
    assertEquals(5, stream1.getTokenMap().size());
    assertEquals(5, stream2.getAllTokens().size());
    assertEquals(5, stream2.getTokenMap().size());
    stream1 = null; stream2 = null;
  }
View Full Code Here

    }
  }
 
 
  protected Object[] runtest(String... input) throws TokenizerException {
    TokenStream stream = new TokenStream(input[0]);
    if (input.length > 1) {
      stream.append(Arrays.copyOfRange(input, 1, input.length));
    }
   
    rule.apply(stream);
    Collection<String> strtokens = stream.getAllTokens();
    return (strtokens != null) ? strtokens.toArray() : new Object[]{};
  }
View Full Code Here

   */
  @SuppressWarnings("deprecation")
  @Test
  public void testAppend() {
    //appending null
    TokenStream stream = new TokenStream("test");
    stream.append((String[])null);
    assertEquals(new Object[]{"test"}, stream.getAllTokens().toArray());
    stream = null;
   
    //appending empty string
    stream = new TokenStream("test");
    stream.append("");
    assertEquals(new Object[]{"test"}, stream.getAllTokens().toArray());
    stream = null;
   
    //one token
    stream = new TokenStream("test");
    stream.append("string");
    assertEquals(new Object[]{"test", "string"}, stream.getAllTokens().toArray());
    stream = null;
   
    //multiple tokens
    stream = new TokenStream("test");
    stream.append("string","with","multiple","tokens");
    assertEquals(new Object[]{"test", "string","with","multiple","tokens"}, stream.getAllTokens().toArray());
    stream = null;
   
    //intermediate nulls and emptys
    stream = new TokenStream("test");
    stream.append("string","with",null,"and","","tokens");
    assertEquals(new Object[]{"test", "string","with","and","tokens"}, stream.getAllTokens().toArray());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#getTokenMap()}.
   */
  @Test
  public void testGetTokenMap() {
    //null string based stream
    TokenStream stream = new TokenStream((String)null);
    assertEquals(null, stream.getTokenMap());
    stream = null;
   
    //empty string
    stream = new TokenStream("");
    assertEquals(null, stream.getTokenMap());
    stream = null;
   
    //unique tokens
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "string");
    Map<String, Integer> smap = getSortedMap(stream.getTokenMap());
    assertEquals("[a, is, string, test, this]", smap.keySet().toString());
    assertEquals("[1, 1, 1, 1, 1]", smap.values().toString());
    stream = null;
    smap = null;
   
    //same token repeated
    stream = new TokenStream("hello");
    stream.append("hello", "hello", "hello", "hello");
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[hello]", smap.keySet().toString());
    assertEquals("[5]", smap.values().toString());
    stream = null;
    smap = null;
   
    //combination
    stream = new TokenStream("to");
    stream.append("be", "or", "not", "to", "be");
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[be, not, or, to]", smap.keySet().toString());
    assertEquals("[2, 1, 1, 2]", smap.values().toString());
    stream = null;
    smap = null;
   
    //with remove
    stream = new TokenStream("to");
    stream.append("be", "or", "not", "to", "be");
    stream.remove();
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[be, not, or, to]", smap.keySet().toString());
    assertEquals("[2, 1, 1, 1]", smap.values().toString());
    stream.seekEnd();
    stream.previous(); //be
    stream.previous(); //to
    stream.remove();
    stream.previous();
    stream.remove();
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[be, or]", smap.keySet().toString());
    assertEquals("[2, 1]", smap.values().toString());
    stream = null;
    smap = null;
   
    //with merge with previous
    stream = new TokenStream("to");
    stream.append("be", "or", "not", "to", "be");
    stream.next(); //at be
    stream.mergeWithPrevious();
    stream.seekEnd();
    stream.previous();
    stream.mergeWithPrevious();
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[not, or, to be]", smap.keySet().toString());
    assertEquals("[1, 1, 2]", smap.values().toString());
    stream = null;
   
    //with merge with next
    stream = new TokenStream("to");
    stream.append("be", "or", "not", "to", "be");
    stream.mergeWithNext();
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.mergeWithNext();
    smap = getSortedMap(stream.getTokenMap());
    assertEquals("[not, or, to be]", smap.keySet().toString());
    assertEquals("[1, 1, 2]", smap.values().toString());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#query(java.lang.String)}.
   */
  @Test
  public void testQuery() {
    //null string based stream
    TokenStream stream = new TokenStream((String)null);
    assertEquals(0, stream.query("test"));
    stream = null;
   
    //empty string
    stream = new TokenStream("");
    assertEquals(0, stream.query("test"));
    stream = null;
   
    //unique tokens
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "string");
   
    assertEquals(1, stream.query("test"));
    assertEquals(0, stream.query("hello"));
    stream = null;
   
    //same token repeated
    stream = new TokenStream("hello");
    stream.append("hello","hello","hello","hello");
    assertEquals(0, stream.query("test"));
    assertEquals(5, stream.query("hello"));
    stream = null;
   
    //combination
    stream = new TokenStream("to");
    stream.append("be", "or", "not", "to", "be");
    assertEquals(2, stream.query("be"));
    assertEquals(1, stream.query("not"));
    assertEquals(0, stream.query("test"));
    stream = null;
   
    //with remove
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "string");
    stream.remove(); //this removed
    assertEquals(0, stream.query("this"));
    stream = null;
   
    //with merge with previous
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "string");
    stream.next();
    stream.mergeWithPrevious();
    assertEquals(0, stream.query("this"));
    assertEquals(1, stream.query("this is"));
    stream = null;
   
    //with merge with next
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "string");
    stream.mergeWithNext();
    assertEquals(0, stream.query("this"));
    assertEquals(1, stream.query("this is"));
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#hasNext()}.
   */
  @Test
  public void testHasNext() {
    //null
    TokenStream stream = new TokenStream((String)null);
    assertEquals(false, stream.hasNext());
    stream = null;
   
    //empty
    stream = new TokenStream("");
    assertEquals(false, stream.hasNext());
    stream = null;
   
    //some text and iteration
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertTrue(stream.hasNext());
    stream.next(); //after this
    assertTrue(stream.hasNext());
    stream.next(); //after is
    assertTrue(stream.hasNext());
    stream.next(); //after a
    assertTrue(stream.hasNext());
    stream.next(); //after test
    assertTrue(stream.hasNext());
    stream.next(); //after stream
    assertFalse(stream.hasNext());
    stream = null;
   
    //with seek
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    assertFalse(stream.hasNext());
    stream = null;
   
    //forward and reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    while (stream.hasNext()) {
      stream.next();
    }
   
    stream.previous();
    assertTrue(stream.hasNext());
    stream.next();
    assertFalse(stream.hasNext());
    stream = null;
   
    //with remove
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.previous();
    stream.remove();
    assertFalse(stream.hasNext());
    stream = null;
   
    //with merge with previous
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    stream.mergeWithPrevious();
    assertTrue(stream.hasNext());
    stream.seekEnd();
    stream.previous();
    stream.mergeWithPrevious();
    assertTrue(stream.hasNext());
    stream = null;
   
    //with merge with next
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.mergeWithNext();
    assertTrue(stream.hasNext());
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.mergeWithNext();
    stream.next();
    assertFalse(stream.hasNext());
    stream = null;
  }
View Full Code Here

TOP

Related Classes of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.