Package org.apache.nutch.crawl

Examples of org.apache.nutch.crawl.MapWritable


    // Its not a tradegy.
    add(urlStr, doc, "encoding", parse.getData().getMeta(ENCODING_KEY),
      false, true, true, false, false);

    // Get metadatas.
    MapWritable mw = datum.getMetaData();
    ParseData pd = parse.getData();

    // Add as stored, indexed, and untokenized but not lowercased.
    add(urlStr, doc, ARCCOLLECTION_KEY,
      getMetadataValue(ARCCOLLECTION_KEY, pd, mw),
View Full Code Here


    final long startTime = System.currentTimeMillis();
    final Content content = new Content(url, url, contentBytes, mimetype,
      metaData, getConf());
    datum.setFetchTime(Nutchwax.getDate(arcData.getDate()));

    MapWritable mw = datum.getMetaData();
   
    if (mw == null)
    {
      mw = new MapWritable();
    }
           
    if (collectionType.equals(Global.COLLECTION_TYPE_MULTIPLE)) {
      mw.put(new Text(ImportArcs.ARCCOLLECTION_KEY), new Text(SqlSearcher.getCollectionNameWithTimestamp(collectionName,arcData.getDate())));  
    }
    else {
      mw.put(new Text(ImportArcs.ARCCOLLECTION_KEY), new Text(collectionName));
    }   
    mw.put(new Text(ImportArcs.ARCFILENAME_KEY), new Text(arcName));
    mw.put(new Text(ImportArcs.ARCFILEOFFSET_KEY),
      new Text(Long.toString(arcData.getOffset())));
    datum.setMetaData(mw);
         
  TimeoutParsingThread tout=threadPool.getThread(Thread.currentThread().getId(),timeoutIndexingDocument)
  tout.setUrl(url);
View Full Code Here

        public void write(WritableComparable key, Writable value)
          throws IOException
        {                
          FetcherOutput fo = (FetcherOutput)value;
          MapWritable mw = fo.getCrawlDatum().getMetaData();
          Text cdxLine = (Text)mw.get(ImportArcs.CDXKEY);
         
          if (cdxLine != null)
          {
            cdxOut.append(key, cdxLine);
          }
         
          mw.remove(ImportArcs.CDXKEY);
          fetchOut.append(key, fo.getCrawlDatum());
         
          if (fo.getParse() != null)
          {
            parseOut.write(key, fo.getParse());        
View Full Code Here

  public void map(WritableComparable key, Writable value, OutputCollector output,
      Reporter reporter) throws IOException {
    newKey.set(key.toString());
    if (withMetadata) {
      CrawlDatum datum = (CrawlDatum)value;
      MapWritable meta = datum.getMetaData();
      if (meta.size() > 0) {
        MapWritable newMeta = new MapWritable();
        Iterator it = meta.keySet().iterator();
        while (it.hasNext()) {
          WritableComparable k = (WritableComparable)it.next();
          Writable v = meta.get(k);
          if (k instanceof UTF8) {
            Text t = new Text(k.toString());
            k = t;
          }
          newMeta.put(k, v);
        }
        datum.setMetaData(newMeta);
      }
    }
    output.collect(newKey, value);
View Full Code Here

public class TestMapWritable extends TestCase {

  private Configuration configuration = NutchConfiguration.create();

  public void testMap() throws Exception {
    MapWritable map = new MapWritable();
    assertTrue(map.isEmpty());
    for (int i = 0; i < 100; i++) {
      Text key = new Text("" + i);
      IntWritable value = new IntWritable(i);
      map.put(key, value);
      assertEquals(i + 1, map.size());
      assertTrue(map.containsKey(new Text("" + i)));
      assertTrue(map.containsValue(new IntWritable(i)));
      map.remove(key);
      assertEquals(i, map.size());
      map.put(key, value);
      assertEquals(value, map.get(key));
      assertFalse(map.isEmpty());
      assertTrue(map.keySet().contains(key));
      assertEquals(i + 1, map.values().size());
      assertTrue(map.values().contains(value));
    }
    testWritable(map);
    MapWritable map2 = new MapWritable();
    testWritable(map2);
    map2.putAll(map);
    assertEquals(100, map2.size());
    testWritable(map2);

    map.clear();
    assertTrue(map.isEmpty());
    assertEquals(0, map.size());
View Full Code Here

    assertFalse(map.containsKey(new Text("" + 1)));

  }

  public void testWritable() throws Exception {
    MapWritable datum1 = new MapWritable();
    for (int i = 0; i < 100; i++) {
      datum1.put(new LongWritable(i), new Text("" + 1));
    }
    assertEquals(100, datum1.size());
    testWritable(datum1);

    MapWritable datum2 = new MapWritable();
    for (int i = 0; i < 100; i++) {
      datum2.put(new DummyWritable(i), new DummyWritable(i));
    }
    assertEquals(100, datum2.size());
    testWritable(datum2);

    CrawlDatum c = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, 1f);
    c.setMetaData(new MapWritable());
    for (int i = 0; i < 100; i++) {
      c.getMetaData().put(new LongWritable(i), new Text("" + 1));
    }
    testWritable(c);
  }
View Full Code Here

    }
    testWritable(c);
  }
 
  public void testEquals() {
    MapWritable map1 = new MapWritable();
    MapWritable map2 = new MapWritable();
    map1.put(new Text("key1"), new Text("val1"));
    map1.put(new Text("key2"), new Text("val2"));
    map2.put(new Text("key2"), new Text("val2"));
    map2.put(new Text("key1"), new Text("val1"));
    assertTrue(map1.equals(map2));
  }
View Full Code Here

        fs, configuration, file, IntWritable.class, MapWritable.class);
    // write map
    System.out.println("start writing map's");
    long start = System.currentTimeMillis();
    IntWritable key = new IntWritable();
    MapWritable map = new MapWritable();
    LongWritable mapValue = new LongWritable();
    for (int i = 0; i < 1000000; i++) {
      key.set(i);
      mapValue.set(i);
      map.put(key, mapValue);
      writer.append(key, map);
    }
    long needed = System.currentTimeMillis() - start;
    writer.close();
    System.out.println("needed time for writing map's: " + needed);
View Full Code Here

  public void testRecycling() throws Exception {
    Text value = new Text("value");
    Text key1 = new Text("a");
    Text key2 = new Text("b");

    MapWritable writable = new MapWritable();
    writable.put(key1, value);
    assertEquals(writable.get(key1), value);
    assertNull(writable.get(key2));

    DataOutputBuffer dob = new DataOutputBuffer();
    writable.write(dob);
    writable.clear();
    writable.put(key1, value);
    writable.put(key2, value);
    assertEquals(writable.get(key1), value);
    assertEquals(writable.get(key2), value);

    DataInputBuffer dib = new DataInputBuffer();
    dib.reset(dob.getData(), dob.getLength());
    writable.readFields(dib);
    assertEquals(writable.get(key1), value);
    assertNull(writable.get(key2));
  }
View Full Code Here

public class TestMapWritable extends TestCase {

  private Configuration configuration = NutchConfiguration.create();

  public void testMap() throws Exception {
    MapWritable map = new MapWritable();
    assertTrue(map.isEmpty());
    for (int i = 0; i < 100; i++) {
      UTF8 key = new UTF8("" + i);
      IntWritable value = new IntWritable(i);
      map.put(key, value);
      assertEquals(i + 1, map.size());
      assertTrue(map.containsKey(new UTF8("" + i)));
      assertTrue(map.containsValue(new IntWritable(i)));
      map.remove(key);
      assertEquals(i, map.size());
      map.put(key, value);
      assertEquals(value, map.get(key));
      assertFalse(map.isEmpty());
      assertTrue(map.keySet().contains(key));
      assertEquals(i + 1, map.values().size());
      assertTrue(map.values().contains(value));
    }
    testWritable(map);
    MapWritable map2 = new MapWritable();
    testWritable(map2);
    map2.putAll(map);
    assertEquals(100, map2.size());
    testWritable(map2);

    map.clear();
    assertTrue(map.isEmpty());
    assertEquals(0, map.size());
View Full Code Here

TOP

Related Classes of org.apache.nutch.crawl.MapWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.