Examples of WebPage


Examples of org.apache.gora.examples.generated.WebPage

    String revUrl = "foo.com:http/";
    String url = "http://foo.com/";

    webPageStore.createSchema();
    WebPage page = webPageStore.newPersistent();
    Metadata metadata = new Metadata()
    metadata.setVersion(1);
    metadata.putToData(new Utf8("foo"), new Utf8("baz"));

    page.setMetadata(metadata);
    page.setUrl(new Utf8(url));

    webPageStore.put(revUrl, page);
    webPageStore.flush();

    page = webPageStore.get(revUrl);
    metadata = page.getMetadata();
    Assert.assertNotNull(metadata);
    Assert.assertEquals(1, metadata.getVersion());
    Assert.assertEquals(new Utf8("baz"), metadata.getData().get(new Utf8("foo")));
  }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

  @Test
  public void testPutArray() throws IOException, Exception {
    log.info("test method: testPutArray");
    webPageStore.createSchema();
    WebPage page = webPageStore.newPersistent();

    String[] tokens = {"example", "content", "in", "example.com"};

    for(String token: tokens) {
      page.addToParsedContent(new Utf8(token));
    }

    webPageStore.put("com.example/http", page);
    webPageStore.close();
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

  @Test
  public void testPutBytes() throws IOException, Exception {
    log.info("test method: testPutBytes");
    webPageStore.createSchema();
    WebPage page = webPageStore.newPersistent();
    page.setUrl(new Utf8("http://example.com"));
    byte[] contentBytes = "example content in example.com".getBytes();
    ByteBuffer buff = ByteBuffer.wrap(contentBytes);
    page.setContent(buff);

    webPageStore.put("com.example/http", page);
    webPageStore.close();

    assertPutBytes(contentBytes);
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

  @Test
  public void testPutMap() throws IOException, Exception {
    log.info("test method: testPutMap");
    webPageStore.createSchema();

    WebPage page = webPageStore.newPersistent();

    page.setUrl(new Utf8("http://example.com"));
    page.putToOutlinks(new Utf8("http://example2.com"), new Utf8("anchor2"));
    page.putToOutlinks(new Utf8("http://example3.com"), new Utf8("anchor3"));
    page.putToOutlinks(new Utf8("http://example3.com"), new Utf8("anchor4"));
    webPageStore.put("com.example/http", page);
    webPageStore.close();

    assertPutMap();
  }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

    int parsedContentCount = 0;


    for (int i = 0; i < urls.length; i++) {
      WebPage webPage = dataStore.newPersistent();
      webPage.setUrl(new Utf8(urls[i]));
      for (parsedContentCount = 0; parsedContentCount < 5; parsedContentCount++) {
        webPage.addToParsedContent(new Utf8(parsedContent + i + "," + parsedContentCount));
      }
      for (int j = 0; j < urls.length; j += 2) {
        webPage.putToOutlinks(new Utf8(anchor + j), new Utf8(urls[j]));
      }
      dataStore.put(webPage.getUrl().toString(), webPage);
    }

    dataStore.flush();

    for (int i = 0; i < urls.length; i++) {
      WebPage webPage = dataStore.get(urls[i]);
      webPage.setContent(ByteBuffer.wrap(ByteUtils.toBytes(content + i)));
      for (parsedContentCount = 5; parsedContentCount < 10; parsedContentCount++) {
        webPage.addToParsedContent(new Utf8(parsedContent + i + "," + parsedContentCount));
      }
      webPage.getOutlinks().clear();
      for (int j = 1; j < urls.length; j += 2) {
        webPage.putToOutlinks(new Utf8(anchor + j), new Utf8(urls[j]));
      }
      dataStore.put(webPage.getUrl().toString(), webPage);
    }

    dataStore.flush();

    for (int i = 0; i < urls.length; i++) {
      WebPage webPage = dataStore.get(urls[i]);
      Assert.assertEquals(content + i, ByteUtils.toString( toByteArray(webPage.getContent()) ));
      Assert.assertEquals(10, webPage.getParsedContent().size());
      int j = 0;
      for (Utf8 pc : webPage.getParsedContent()) {
        Assert.assertEquals(parsedContent + i + "," + j, pc.toString());
        j++;
      }
      int count = 0;
      for (j = 1; j < urls.length; j += 2) {
        Utf8 link = webPage.getOutlinks().get(new Utf8(anchor + j));
        Assert.assertNotNull(link);
        Assert.assertEquals(urls[j], link.toString());
        count++;
      }
      Assert.assertEquals(count, webPage.getOutlinks().size());
    }

    for (int i = 0; i < urls.length; i++) {
      WebPage webPage = dataStore.get(urls[i]);
      for (int j = 0; j < urls.length; j += 2) {
        webPage.putToOutlinks(new Utf8(anchor + j), new Utf8(urls[j]));
      }
      dataStore.put(webPage.getUrl().toString(), webPage);
    }

    dataStore.flush();

    for (int i = 0; i < urls.length; i++) {
      WebPage webPage = dataStore.get(urls[i]);
      int count = 0;
      for (int j = 0; j < urls.length; j++) {
        Utf8 link = webPage.getOutlinks().get(new Utf8(anchor + j));
        Assert.assertNotNull(link);
        Assert.assertEquals(urls[j], link.toString());
        count++;
      }
    }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

  private static void testGetWebPage(DataStore<String, WebPage> store, String[] fields)
    throws IOException, Exception {
    createWebPageData(store);

    for(int i=0; i<URLS.length; i++) {
      WebPage page = store.get(URLS[i], fields);
      assertWebPage(page, i);
    }
  }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

      Query<String, WebPage> query = store.newQuery();
      query.setFields(fields);
      query.setKey(URLS[i]);
      Result<String, WebPage> result = query.execute();
      Assert.assertTrue(result.next());
      WebPage page = result.get();
      assertWebPage(page, i);
      Assert.assertFalse(result.next());
    }
  }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

          query.setEndKey(sortedUrls.get(j));
        Result<String, WebPage> result = query.execute();

        int r=0;
        while(result.next()) {
          WebPage page = result.get();
          assertWebPage(page, URL_INDEXES.get(page.getUrl().toString()));
          r++;
        }

        int expectedLength = (setEndKeys ? j+1: sortedUrls.size()) -
                             (setStartKeys ? i: 0);
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

   
    assertNumResults(store.newQuery(), URLS.length);

    //assert that data is deleted
    for (int i = 0; i < SORTED_URLS.length; i++) {
      WebPage page = store.get(SORTED_URLS[i]);
      Assert.assertNotNull(page);

      Assert.assertNotNull(page.getUrl());
      Assert.assertEquals(page.getUrl().toString(), SORTED_URLS[i]);
      Assert.assertEquals(0, page.getOutlinks().size());
      Assert.assertEquals(0, page.getParsedContent().size());
      if(page.getContent() != null) {
        System.out.println("url:" + page.getUrl().toString());
        System.out.println( "limit:" + page.getContent().limit());
      } else {
        Assert.assertNull(page.getContent());
      }
    }

    //test 6 - delete some with some fields
    WebPageDataCreator.createWebPageData(store);

    query = store.newQuery();
    query.setFields(WebPage.Field.URL.getName());
    String startKey = SORTED_URLS[NUM_KEYS];
    String endKey = SORTED_URLS[SORTED_URLS.length - NUM_KEYS];
    query.setStartKey(startKey);
    query.setEndKey(endKey);

    assertNumResults(store.newQuery(), URLS.length);
    store.deleteByQuery(query);
    store.deleteByQuery(query);
    store.deleteByQuery(query);//don't you love that HBase sometimes does not delete arbitrarily
   
    store.flush();

    assertNumResults(store.newQuery(), URLS.length);

    //assert that data is deleted
    for (int i = 0; i < URLS.length; i++) {
      WebPage page = store.get(URLS[i]);
      Assert.assertNotNull(page);
      if( URLS[i].compareTo(startKey) < 0 || URLS[i].compareTo(endKey) >= 0) {
        //not deleted
        assertWebPage(page, i);
      } else {
        //deleted
        Assert.assertNull(page.getUrl());
        Assert.assertNotNull(page.getOutlinks());
        Assert.assertNotNull(page.getParsedContent());
        Assert.assertNotNull(page.getContent());
        Assert.assertTrue(page.getOutlinks().size() > 0);
        Assert.assertTrue(page.getParsedContent().size() > 0);
      }
    }

  }
View Full Code Here

Examples of org.apache.gora.examples.generated.WebPage

  }
 
  public static void createWebPageData(DataStore<String, WebPage> dataStore)
  throws IOException {
    try{
      WebPage page;
      log.info("creating web page data");
     
      for(int i=0; i<URLS.length; i++) {
        page = new WebPage();
        page.setUrl(new Utf8(URLS[i]));
        page.setContent(ByteBuffer.wrap(CONTENTS[i].getBytes()));
        for(String token : CONTENTS[i].split(" ")) {
          page.addToParsedContent(new Utf8(token))
        }
       
        for(int j=0; j<LINKS[i].length; j++) {
          page.putToOutlinks(new Utf8(URLS[LINKS[i][j]]), new Utf8(ANCHORS[i][j]));
        }
       
        Metadata metadata = new Metadata();
        metadata.setVersion(1);
        metadata.putToData(new Utf8("metakey"), new Utf8("metavalue"));
        page.setMetadata(metadata);
       
        dataStore.put(URLS[i], page);
      }
      dataStore.flush();
      log.info("finished creating web page data");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.