Package org.apache.flume.sink.hdfs

Source Code of org.apache.flume.sink.hdfs.TestBucketWriter

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.sink.hdfs;

import java.io.File;
import java.io.IOException;
import java.util.Calendar;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.flume.Clock;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Charsets;

public class TestBucketWriter {

  private static Logger logger =
      LoggerFactory.getLogger(TestBucketWriter.class);
  private Context ctx = new Context();

  private static ScheduledExecutorService timedRollerPool;

  @BeforeClass
  public static void setup() {
    timedRollerPool = Executors.newSingleThreadScheduledExecutor();
  }

  @AfterClass
  public static void teardown() throws InterruptedException {
    timedRollerPool.shutdown();
    timedRollerPool.awaitTermination(2, TimeUnit.SECONDS);
    timedRollerPool.shutdownNow();
  }

  @Test
  public void testEventCountingRoller() throws IOException, InterruptedException {
    int maxEvents = 100;
    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    BucketWriter bucketWriter = new BucketWriter(0, 0, maxEvents, 0, ctx,
        "/tmp", "file", "", ".tmp", null, null, SequenceFile.CompressionType.NONE,
        hdfsWriter, timedRollerPool, null,
        new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0,
        null, null, 30000, Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    for (int i = 0; i < 1000; i++) {
      bucketWriter.append(e);
    }

    logger.info("Number of events written: {}", hdfsWriter.getEventsWritten());
    logger.info("Number of bytes written: {}", hdfsWriter.getBytesWritten());
    logger.info("Number of files opened: {}", hdfsWriter.getFilesOpened());

    Assert.assertEquals("events written", 1000, hdfsWriter.getEventsWritten());
    Assert.assertEquals("bytes written", 3000, hdfsWriter.getBytesWritten());
    Assert.assertEquals("files opened", 10, hdfsWriter.getFilesOpened());
  }

  @Test
  public void testSizeRoller() throws IOException, InterruptedException {
    int maxBytes = 300;
    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    BucketWriter bucketWriter = new BucketWriter(0, maxBytes, 0, 0,
      ctx, "/tmp", "file", "", ".tmp", null, null,
      SequenceFile.CompressionType.NONE, hdfsWriter,timedRollerPool,
      null, new SinkCounter("test-bucket-writer-" +
      System.currentTimeMillis()),0, null, null, 30000,
      Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    for (int i = 0; i < 1000; i++) {
      bucketWriter.append(e);
    }

    logger.info("Number of events written: {}", hdfsWriter.getEventsWritten());
    logger.info("Number of bytes written: {}", hdfsWriter.getBytesWritten());
    logger.info("Number of files opened: {}", hdfsWriter.getFilesOpened());

    Assert.assertEquals("events written", 1000, hdfsWriter.getEventsWritten());
    Assert.assertEquals("bytes written", 3000, hdfsWriter.getBytesWritten());
    Assert.assertEquals("files opened", 10, hdfsWriter.getFilesOpened());
  }

  @Test
  public void testIntervalRoller() throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1; // seconds
    final int NUM_EVENTS = 10;
    final AtomicBoolean calledBack = new AtomicBoolean(false);

    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0, 0, ctx,
      "/tmp", "file", "", ".tmp", null, null, SequenceFile.CompressionType.NONE,
      hdfsWriter, timedRollerPool, null,
      new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()),
      0, new HDFSEventSink.WriterCallback() {
      @Override
      public void run(String filePath) {
        calledBack.set(true);
      }
    }, null, 30000, Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    long startNanos = System.nanoTime();
    for (int i = 0; i < NUM_EVENTS - 1; i++) {
      bucketWriter.append(e);
    }

    // sleep to force a roll... wait 2x interval just to be sure
    Thread.sleep(2 * ROLL_INTERVAL * 1000L);

    Assert.assertTrue(bucketWriter.closed);
    Assert.assertTrue(calledBack.get());

    bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0, 0, ctx,
      "/tmp", "file", "", ".tmp", null, null, SequenceFile.CompressionType.NONE,
      hdfsWriter, timedRollerPool, null,
      new SinkCounter("test-bucket-writer-"
        + System.currentTimeMillis()), 0, null, null, 30000,
      Executors.newSingleThreadExecutor(), 0, 0);
    // write one more event (to reopen a new file so we will roll again later)
    bucketWriter.append(e);

    long elapsedMillis = TimeUnit.MILLISECONDS.convert(
        System.nanoTime() - startNanos, TimeUnit.NANOSECONDS);
    long elapsedSeconds = elapsedMillis / 1000L;

    logger.info("Time elapsed: {} milliseconds", elapsedMillis);
    logger.info("Number of events written: {}", hdfsWriter.getEventsWritten());
    logger.info("Number of bytes written: {}", hdfsWriter.getBytesWritten());
    logger.info("Number of files opened: {}", hdfsWriter.getFilesOpened());
    logger.info("Number of files closed: {}", hdfsWriter.getFilesClosed());

    Assert.assertEquals("events written", NUM_EVENTS,
        hdfsWriter.getEventsWritten());
    Assert.assertEquals("bytes written", e.getBody().length * NUM_EVENTS,
        hdfsWriter.getBytesWritten());
    Assert.assertEquals("files opened", 2, hdfsWriter.getFilesOpened());

    // before auto-roll
    Assert.assertEquals("files closed", 1, hdfsWriter.getFilesClosed());

    logger.info("Waiting for roll...");
    Thread.sleep(2 * ROLL_INTERVAL * 1000L);

    logger.info("Number of files closed: {}", hdfsWriter.getFilesClosed());
    Assert.assertEquals("files closed", 2, hdfsWriter.getFilesClosed());
  }

  @Test
  public void testIntervalRollerBug() throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1; // seconds
    final int NUM_EVENTS = 10;

    HDFSWriter hdfsWriter = new HDFSWriter() {
      private volatile boolean open = false;

      public void configure(Context context) {

      }

      public void sync() throws IOException {
        if(!open) {
          throw new IOException("closed");
        }
      }

      public void open(String filePath, CompressionCodec codec,
          CompressionType cType) throws IOException {
        open = true;
      }

      public void open(String filePath) throws IOException {
        open = true;
      }

      public void close() throws IOException {
        open = false;
      }

      @Override
      public boolean isUnderReplicated() {
        return false;
      }

      public void append(Event e) throws IOException {
        // we just re-open in append if closed
        open = true;
      }
    };
    HDFSTextSerializer serializer = new HDFSTextSerializer();
    File tmpFile = File.createTempFile("flume", "test");
    tmpFile.deleteOnExit();
    String path = tmpFile.getParent();
    String name = tmpFile.getName();

    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0,
      0, ctx, path, name, "", ".tmp", null, null,
      SequenceFile.CompressionType.NONE, hdfsWriter,
      timedRollerPool, null, new SinkCounter("test-bucket-writer-"
      + System.currentTimeMillis()),
      0, null, null, 30000, Executors.newSingleThreadExecutor(),
      0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    for (int i = 0; i < NUM_EVENTS - 1; i++) {
      bucketWriter.append(e);
    }

    // sleep to force a roll... wait 2x interval just to be sure
    Thread.sleep(2 * ROLL_INTERVAL * 1000L);

    bucketWriter.flush(); // throws closed exception
  }

  @Test
  public void testFileSuffixNotGiven() throws IOException, InterruptedException {
      final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
      final String suffix = null;

      MockHDFSWriter hdfsWriter = new MockHDFSWriter();
      BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0,
        0, 0, ctx, "/tmp", "file", "", ".tmp", suffix, null,
        SequenceFile.CompressionType.NONE, hdfsWriter,
        timedRollerPool, null, new SinkCounter("test-bucket-writer-"
        + System.currentTimeMillis()), 0, null, null, 30000,
        Executors.newSingleThreadExecutor(), 0, 0);

      // Need to override system time use for test so we know what to expect
      final long testTime = System.currentTimeMillis();
      Clock testClock = new Clock() {
          public long currentTimeMillis() {
              return testTime;
          }
      };
      bucketWriter.setClock(testClock);

      Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
      bucketWriter.append(e);

      Assert.assertTrue("Incorrect suffix", hdfsWriter.getOpenedFilePath().endsWith(Long.toString(testTime+1) + ".tmp"));
  }

    @Test
    public void testFileSuffixGiven() throws IOException, InterruptedException {
        final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
        final String suffix = ".avro";

      MockHDFSWriter hdfsWriter = new MockHDFSWriter();
      BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0,
        0, 0, ctx, "/tmp", "file", "", ".tmp", suffix, null,
        SequenceFile.CompressionType.NONE, hdfsWriter,
        timedRollerPool, null, new SinkCounter(
        "test-bucket-writer-" + System.currentTimeMillis()), 0,
        null, null, 30000, Executors.newSingleThreadExecutor(), 0, 0);

        // Need to override system time use for test so we know what to expect

        final long testTime = System.currentTimeMillis();

        Clock testClock = new Clock() {
            public long currentTimeMillis() {
                return testTime;
            }
        };
        bucketWriter.setClock(testClock);

        Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
        bucketWriter.append(e);

        Assert.assertTrue("Incorrect suffix", hdfsWriter.getOpenedFilePath().endsWith(
          Long.toString(testTime + 1) + suffix + ".tmp"));
    }

  @Test
  public void testFileSuffixCompressed()
      throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
    final String suffix = ".foo";

    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0,
      0, ctx, "/tmp", "file", "", ".tmp", suffix,
      HDFSEventSink.getCodec("gzip"),
      SequenceFile.CompressionType.BLOCK, hdfsWriter,
      timedRollerPool, null, new SinkCounter("test-bucket-writer-"
      + System.currentTimeMillis()), 0, null, null, 30000,
      Executors.newSingleThreadExecutor(), 0, 0
    );

    // Need to override system time use for test so we know what to expect
    final long testTime = System.currentTimeMillis();

    Clock testClock = new Clock() {
      public long currentTimeMillis() {
        return testTime;
      }
    };
    bucketWriter.setClock(testClock);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    bucketWriter.append(e);

    Assert.assertTrue("Incorrect suffix",hdfsWriter.getOpenedFilePath()
        .endsWith(Long.toString(testTime+1) + suffix + ".tmp"));
  }

  @Test
  public void testInUsePrefix() throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
    final String PREFIX = "BRNO_IS_CITY_IN_CZECH_REPUBLIC";

    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    HDFSTextSerializer formatter = new HDFSTextSerializer();
    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0,
      0, ctx, "/tmp", "file", PREFIX, ".tmp", null, null,
      SequenceFile.CompressionType.NONE, hdfsWriter,
      timedRollerPool, null, new SinkCounter(
        "test-bucket-writer-" + System.currentTimeMillis()), 0,
      null, null, 30000, Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    bucketWriter.append(e);

    Assert.assertTrue("Incorrect in use prefix", hdfsWriter.getOpenedFilePath().contains(PREFIX));
  }

  @Test
  public void testInUseSuffix() throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
    final String SUFFIX = "WELCOME_TO_THE_HELLMOUNTH";

    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    HDFSTextSerializer serializer = new HDFSTextSerializer();
    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0,
      0, ctx, "/tmp", "file", "", SUFFIX, null, null,
      SequenceFile.CompressionType.NONE, hdfsWriter,
      timedRollerPool, null, new SinkCounter(
        "test-bucket-writer-" + System.currentTimeMillis()), 0,
      null, null, 30000, Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    bucketWriter.append(e);

    Assert.assertTrue("Incorrect in use suffix", hdfsWriter.getOpenedFilePath().contains(SUFFIX));
  }

  @Test
  public void testCallbackOnClose() throws IOException, InterruptedException {
    final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
    final String SUFFIX = "WELCOME_TO_THE_EREBOR";
    final AtomicBoolean callbackCalled = new AtomicBoolean(false);

    MockHDFSWriter hdfsWriter = new MockHDFSWriter();
    BucketWriter bucketWriter = new BucketWriter(ROLL_INTERVAL, 0, 0,
      0, ctx, "/tmp", "file", "", SUFFIX, null, null,
      SequenceFile.CompressionType.NONE,
      hdfsWriter, timedRollerPool, null,
      new SinkCounter(
        "test-bucket-writer-" + System.currentTimeMillis()), 0,
      new HDFSEventSink.WriterCallback() {
      @Override
      public void run(String filePath) {
        callbackCalled.set(true);
      }
    }, "blah", 30000, Executors.newSingleThreadExecutor(), 0, 0);

    Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
    bucketWriter.append(e);
    bucketWriter.close(true);

    Assert.assertTrue(callbackCalled.get());
  }



  @Test
  public void testSequenceFileCloseRetries() throws Exception {
    SequenceFileCloseRetryCoreTest(1);
    SequenceFileCloseRetryCoreTest(5);
    SequenceFileCloseRetryCoreTest(2);

  }


  public void SequenceFileCloseRetryCoreTest(int numberOfClosesRequired) throws Exception {
    String hdfsPath = "file:///tmp/flume-test."
      + Calendar.getInstance().getTimeInMillis() + "."
      + Thread.currentThread().getId();

    Context context = new Context();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path(hdfsPath);
    fs.delete(dirPath, true);
    fs.mkdirs(dirPath);
    context.put("hdfs.path", hdfsPath);
    context.put("hdfs.closeTries",
      String.valueOf(numberOfClosesRequired));
    context.put("hdfs.rollCount", "1");
    context.put("hdfs.retryInterval", "1");
    context.put("hdfs.callTimeout", Long.toString(1000));
    MockFileSystem mockFs = new
      MockFileSystem(fs,
      numberOfClosesRequired);
    BucketWriter bucketWriter = new BucketWriter(0, 0, 1, 1, ctx,
      hdfsPath, hdfsPath, "singleBucket", ".tmp", null, null,
      null, new MockDataStream(mockFs),
      timedRollerPool, null,
      new SinkCounter(
        "test-bucket-writer-" + System.currentTimeMillis()),
      0, null, null, 30000, Executors.newSingleThreadExecutor(), 1,
      numberOfClosesRequired);

    bucketWriter.setFileSystem(mockFs);
    // At this point, we checked if isFileClosed is available in
    // this JVM, so lets make it check again.
    Event event = EventBuilder.withBody("test", Charsets.UTF_8);
    bucketWriter.append(event);
    // This is what triggers the close, so a 2nd append is required :/
    bucketWriter.append(event);

    TimeUnit.SECONDS.sleep(numberOfClosesRequired + 2);

    int expectedNumberOfCloses = numberOfClosesRequired;
    Assert.assertTrue("Expected " + expectedNumberOfCloses + " " +
      "but got " + bucketWriter.closeTries.get(),
      bucketWriter.closeTries.get() ==
        expectedNumberOfCloses);
  }
}
TOP

Related Classes of org.apache.flume.sink.hdfs.TestBucketWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.