Package com.pardot.rhombus

Source Code of com.pardot.rhombus.TableScanner

package com.pardot.rhombus;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.pardot.rhombus.cobject.CObjectTokenVisitor;
import com.pardot.rhombus.cobject.CObjectTokenVisitorFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.input.ReversedLinesFileReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.math.BigInteger;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
* User: Michael Frank
* Date: 6/30/14
*/
public class TableScanner {

  private static Logger logger = LoggerFactory.getLogger(TableScanner.class);

  public static final long minToken = Long.MIN_VALUE;
  public static final long maxToken = Long.MAX_VALUE;
  private long batchSize = 200;
  private int statementRetries = 5;

  private ObjectMapper objectMapper;
  private String objectType;
  private final int numPartitions;
  CObjectTokenVisitorFactory visitorFactory;
  private ExecutorService executorService;
  private File savepointDirectory;
  private PrintWriter[] savepointWriters = null;
  private final CountDownLatch shutdownLatch;

  public TableScanner(ObjectMapper objectMapper, String objectType, int numPartitions, CObjectTokenVisitorFactory visitorFactory, String savepointDirectoryPath) {
    Preconditions.checkNotNull(objectMapper);
    Preconditions.checkNotNull(objectType);
    Preconditions.checkNotNull(numPartitions);
    Preconditions.checkNotNull(visitorFactory);
    this.objectMapper = objectMapper;
    this.objectType = objectType;
    this.numPartitions = numPartitions;
    this.visitorFactory = visitorFactory;
    executorService = Executors.newFixedThreadPool(numPartitions);
    shutdownLatch = new CountDownLatch(numPartitions);
    if (savepointDirectoryPath != null) {
      this.savepointDirectory = new File(savepointDirectoryPath);
      this.savepointDirectory.mkdir();
    }
  }

  public void scan() {
    List<Map.Entry<Long, Long>> ranges = makeRanges();
    if (this.savepointDirectory != null) {
      this.savepointWriters = this.makeSavepointWriters(this.numPartitions);
    }

    int rangeNum = 0;
    for(final Map.Entry<Long, Long> range : ranges) {
      final Integer partitionId = rangeNum;
      rangeNum++;
      final CObjectTokenVisitor visitor = visitorFactory.getInstance("" + rangeNum);
      Runnable r = new Runnable() {
        @Override
        public void run() {
          handle(range.getKey(), range.getValue(), visitor, partitionId);
        }
      };
      executorService.execute(r);
    }
    try {
      shutdownLatch.await();
    } catch (InterruptedException e) {
      e.printStackTrace();
    } finally {
      if (this.savepointWriters != null) {
        for(int i = 0; i < numPartitions; i++) {
          this.savepointWriters[i].close();
        }
      }
    }
  }

  public List<Map.Entry<Long, Long>> makeRanges() {
    Long[] savedStartTokens = this.readSavepoints();
    List<Map.Entry<Long, Long>> ranges = Lists.newArrayList();
    BigInteger fullRange = BigInteger.valueOf(maxToken).subtract(BigInteger.valueOf(minToken)).add(BigInteger.ONE);
    BigInteger rangeLength = fullRange.divide(BigInteger.valueOf(numPartitions));
    BigInteger rangeStart = BigInteger.valueOf(minToken);
    for(int i = 0 ; i < numPartitions - 1 ; i++) {
      BigInteger rangeEnd = rangeStart.add(rangeLength).subtract(BigInteger.ONE);
      // If we have a savepoint for this partition, use that start token instead of the default
      if (savedStartTokens != null) {
        rangeStart = BigInteger.valueOf(savedStartTokens[i]);
      }
      AbstractMap.SimpleEntry<Long, Long> range = new AbstractMap.SimpleEntry<Long, Long>(rangeStart.longValue(), rangeEnd.longValue());
      ranges.add(range);
      rangeStart = rangeEnd.add(BigInteger.ONE);
    }
    BigInteger rangeEnd = BigInteger.valueOf(maxToken);
    if (savedStartTokens != null) {
      rangeStart = BigInteger.valueOf(savedStartTokens[numPartitions - 1]);
    }
    AbstractMap.SimpleEntry<Long, Long> range = new AbstractMap.SimpleEntry<Long, Long>(rangeStart.longValue(), rangeEnd.longValue());
    ranges.add(range);
    return ranges;
  }

  private Long[] readSavepoints() {
    Long[] out = null;
    if (this.savepointDirectory != null) {
      File[] existingSavepoints = this.savepointDirectory.listFiles();
      // Ensure we have the right number of existing savepoint files
      if (existingSavepoints == null) {
        logger.info("No savepoint files found in savepoint directory.");
      } else if (existingSavepoints.length != numPartitions) {
        logger.info("Found " + existingSavepoints.length + " files in savepoint directory, expected " + numPartitions);
      } else {
        // We have the right number of files, let's parse them
        Map<String, File> files = Maps.newHashMap();
        for (File savepoint : existingSavepoints) {
          files.put(savepoint.getName(), savepoint);
        }
        out = new Long[numPartitions];
        for(int i = 0; i < numPartitions; i++) {
          // If any of the savepoints are missing for our partitions, fail out from restoring from savepoints
          if (!files.keySet().contains(TableScanner.getSavepointFilename(i))) {
            return null;
          }
          File savepointFile = files.get(TableScanner.getSavepointFilename(i));
          UUID lastUuid;
          try {
            ReversedLinesFileReader reader = new ReversedLinesFileReader(savepointFile);
            try {
              lastUuid = UUID.fromString(reader.readLine());
            } catch (IllegalArgumentException e) {
              logger.error("Error parsing UUID from savepoint file " + TableScanner.getSavepointFilename(i), e);
              return null;
            }
            reader.close();
          } catch (IOException e) {
            logger.error("Error reading savepoint file", e);
            return null;
          }
          out[i] = this.objectMapper.getTokenForId(this.objectType, lastUuid);
        }
      }
    }
    logger.info("Read savepoints successfully");
    return out;
  }

  /**
   * @param partitionId 0-based partition id
   * @return Savepoint filename
   */
  public static String getSavepointFilename(Integer partitionId) {
    return "savepoint-" + partitionId.toString() + ".txt";
  }

  private PrintWriter[] makeSavepointWriters(Integer partitionCount) {
    PrintWriter[] out = new PrintWriter[partitionCount];
    try {
      this.readySafepointDirectory(partitionCount);
      for(int i = 0; i < partitionCount; i++) {
        // Build a writer for each partition thread
        out[i] = new PrintWriter(new FileOutputStream(this.savepointDirectory.getName() + "/" + TableScanner.getSavepointFilename(i), true));
      }
    } catch (IOException e) {
      return null;
    }
    return out;
  }

  private void readySafepointDirectory(Integer partitionCount) throws IOException {
    File[] fileArray = this.savepointDirectory.listFiles();
    // If there are already files in the savepoint directory...
    if (fileArray != null) {
      // Let's find out if they match our config or not (i.e. probably from the same command run previously)
      Map<String, File> files = Maps.newHashMap();
      for (File savepoint : fileArray) {
        files.put(savepoint.getName(), savepoint);
      }
      Set<String> requiredFilenames = Sets.newHashSet();
      for(int i = 0; i < partitionCount; i++) {
        requiredFilenames.add(TableScanner.getSavepointFilename(i));
      }

      // If they don't have the right filenames, clean the directory out
      if (!files.keySet().equals(requiredFilenames)) {
        logger.info("Existing savepoint files don't match what we're looking for, clearing them out and starting over.");
        FileUtils.cleanDirectory(this.savepointDirectory);
      }
      logger.info("Appending savepoints to existing files.");
      // Otherwise leave the files there, we can append to them
    }
  }

  public void handle(Long minToken, Long maxToken, CObjectTokenVisitor visitor, Integer partitionId) {
    visitor.setUp();
    List<Map<String, Object>> results = executeStatement(objectType, minToken, maxToken, batchSize);
    while(results != null && results.size() > 0) {
      visitResults(results, visitor);
      String minUuid = String.valueOf(results.get(results.size() - 1).get("id"));
      results = executeStatement(objectType, minUuid, maxToken, batchSize);
      if (this.savepointWriters != null) {
        // We've processed this range, so save our progress for later
        this.savepointWriters[partitionId].append(minUuid);
        this.savepointWriters[partitionId].append("\n");
      }
    }
    visitor.cleanUp();
    shutdownLatch.countDown();
  }

  private void visitResults(List<Map<String, Object>> results, CObjectTokenVisitor visitor) {
    for(Map<String, Object> result : results) {
      visitor.visit(result);
    }
  }

  private List<Map<String, Object>> executeStatement(String objectType, Object minToken, Long maxToken, long batchSize) {
    int retries = 0;
    while(retries < statementRetries) {
      try {
        List<Map<String, Object>> results;
        if(Long.class.isAssignableFrom(minToken.getClass())) {
          results = objectMapper.scanTableWithStartToken(objectType, (Long)minToken, maxToken, batchSize);
        } else {
          results = objectMapper.scanTableWithStartId(objectType, (String)minToken, maxToken, batchSize);
        }
        return results;
      } catch(Exception e) {
        logger.error("Exception number " + retries + " executing statement", e);
        retries++;
        if(retries >= statementRetries) {
          throw new RuntimeException(e);
        }
      }
    }
    throw new RuntimeException("Bad logic in statement retry");
  }

  public long getBatchSize() {
    return batchSize;
  }

  public void setBatchSize(int batchSize) {
    this.batchSize = batchSize;
  }

  public int getStatementRetries() {
    return statementRetries;
  }

  public void setStatementRetries(int statementRetries) {
    this.statementRetries = statementRetries;
  }
}
TOP

Related Classes of com.pardot.rhombus.TableScanner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.