Package fr.eolya.crawler.queue

Examples of fr.eolya.crawler.queue.ISourceItemsQueue


    SourceWeb src = (SourceWeb) source;

    StartingUrls startingUrls = src.getStartingUrls();
    if (startingUrls==null) return null;

    ISourceItemsQueue sourceQueue = QueueFactory.getSourceItemsQueueInstance(type, src.getId(), con, dbName, dbCollName);
    if (sourceQueue==null) return null;
   
    if (src.isReset() || src.isClear())
      sourceQueue.reset();

    if (src.isClear()) return sourceQueue;

    // TODO : v4 - Utile ???
    if (sourceQueue.getQueueSize()>0 && !src.isRescan() && !src.isRescanFromCache())
      sourceQueue.start();
    else {
      if (!src.isRescan() && !src.isRescanFromCache()) {
        int startDepth = 0;
        if (src.isDeeper()) {
          startDepth = sourceQueue.getCurrentMaxDepth();
        }
        sourceQueue.reStart(startDepth);
      } else {
        sourceQueue.reScan();
      }

    }
    // Utile ???

    if (!src.isRescan() && !src.isRescanFromCache() && !src.isDeeper()) {
      // Add or update starting urls
      boolean haveSiteMode = false;
      for (int i=0; i<startingUrls.size(); i++) {
        if ("s".equals(startingUrls.get(i).mode)) haveSiteMode = true;
      }
      for (int i=0; i<startingUrls.size(); i++) {
        if (!startingUrls.get(i).onlyFirstCrawl || !src.isFirstCrawlCompleted()) {
          if (haveSiteMode && !src.isFirstCrawlCompleted() && !"s".equals(startingUrls.get(i).mode)) continue;
          try {
            sourceQueue.push(startingUrls.get(i).getMap(src.getId()));
          } catch (Exception e) {
            e.printStackTrace();
          }
        }
      }
View Full Code Here


 
  @Test
  public void testMongoDBWebSiteUrlFifoQueue() {
    try {
      IDBConnection dbConnection = DBConnectionFactory.getDBConnectionInstance("mongodb", "localhost", 27017"", "");
      ISourceItemsQueue queue = QueueFactory.getSourceItemsQueueInstance("mongodb", 1, dbConnection, "testFifoQueue", "TestMongoDBWebSiteUrlFifoQueue");
     
      if (queue!=null) {
        queue.reset();
        queue.start();
        assertEquals(0, queue.size());
       
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/", 0).getMap()));
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/2.html", 1).getMap()));
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/3.html", 2).getMap()));

        assertTrue(queue.contains("http://a.a.a/"));
        assertEquals(3, queue.size());

        assertEquals(0, queue.getDoneQueueSize());
        assertFalse(queue.isDone("http://a.a.a/"));
       
        assertEquals("http://a.a.a/", queue.pop().get("url"));
        assertEquals("http://a.a.a/2.html", queue.pop().get("url"));
        assertEquals("http://a.a.a/3.html", queue.pop().get("url"));

        assertEquals(3, queue.getDoneQueueSize());
        assertTrue(queue.isDone("http://a.a.a/"));

        assertFalse(queue.contains("http://a.a.a/"));
        assertEquals(0, queue.size());

        assertFalse(queue.push(getItemWeb(1, "http://a.a.a/", 0).getMap()));
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/4.html", 2).getMap()));
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/3.html", 1).getMap()));
        assertFalse(queue.push(getItemWeb(1, "http://a.a.a/2.html", 2).getMap()));
       
        assertEquals(2, queue.size());

        assertEquals("http://a.a.a/3.html", queue.pop("depth").get("url"));
        assertEquals("http://a.a.a/4.html", queue.pop("depth").get("url"));

        assertEquals(0, queue.size());   
      }
     
      queue = QueueFactory.getSourceItemsQueueInstance("mongodb", 1, dbConnection, "testFifoQueue", "TestMongoDBWebSiteUrlFifoQueue")
      if (queue!=null) {
        queue.start();
        assertEquals(0, queue.size());
        assertFalse(queue.push(getItemWeb(1, "http://a.a.a/", 0).getMap()));
        queue.stop();
      }

      queue = QueueFactory.getSourceItemsQueueInstance("mongodb", 1, dbConnection, "testFifoQueue", "TestMongoDBWebSiteUrlFifoQueue")
      if (queue!=null) {
        queue.start();
        assertEquals(0, queue.size());
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/", 0).getMap()));
      }
    }
    catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here

  }
 
  public void testMongoDBWebSiteUrlFifoQueue2() {
    try {
      IDBConnection dbConnection = DBConnectionFactory.getDBConnectionInstance("mongodb", "localhost", 27017"", "");
      ISourceItemsQueue queue = QueueFactory.getSourceItemsQueueInstance("mongodb", 1, dbConnection, "testFifoQueue", "TestMongoDBWebSiteUrlFifoQueue");
     
      if (queue!=null) {
        queue.reset();
        queue.start();
        assertEquals(0, queue.size());
        assertTrue(queue.push(getItemWeb(1, "http://a.a.a/", 0).getMap()));
      }
    }
    catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here

TOP

Related Classes of fr.eolya.crawler.queue.ISourceItemsQueue

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.