Package org.rabinfingerprint.handprint

Source Code of org.rabinfingerprint.handprint.FingerFactory

package org.rabinfingerprint.handprint;

import java.io.IOException;
import java.io.InputStream;

import org.rabinfingerprint.fingerprint.RabinFingerprintLong;
import org.rabinfingerprint.fingerprint.RabinFingerprintLongWindowed;
import org.rabinfingerprint.polynomial.Polynomial;

import com.google.common.io.ByteStreams;

public class FingerFactory {
  public static interface ChunkBoundaryDetector {
    public boolean isBoundary(RabinFingerprintLong fingerprint);
  }

  public static interface ChunkVisitor {
    public void visit(long fingerprint, long chunkStart, long chunkEnd);
  }
 
  private final RabinFingerprintLong finger;
  private final RabinFingerprintLongWindowed fingerWindow;
  private final ChunkBoundaryDetector boundaryDetector;

  public FingerFactory(Polynomial p, long bytesPerWindow, ChunkBoundaryDetector boundaryDetector) {
    this.finger = new RabinFingerprintLong(p);
    this.fingerWindow = new RabinFingerprintLongWindowed(p, bytesPerWindow);
    this.boundaryDetector = boundaryDetector;
  }

  private RabinFingerprintLong newFingerprint() {
    return new RabinFingerprintLong(finger);
  }

  private RabinFingerprintLongWindowed newWindowedFingerprint() {
    return new RabinFingerprintLongWindowed(fingerWindow);
  }

  /**
   * Fingerprint the file into chunks called "Fingers". The chunk boundaries
   * are determined using a windowed fingerprinter
   * {@link RabinFingerprintLongWindowed}.
   *
   * The chunk detector is position independent. Therefore, even if a file is
   * rearranged or partially corrupted, the untouched chunks can be
   * efficiently discovered.
   */
  public void getChunkFingerprints(InputStream is, ChunkVisitor visitor) throws IOException {
    // windowing fingerprinter for finding chunk boundaries. this is only
    // reset at the beginning of the file
    final RabinFingerprintLong window = newWindowedFingerprint();

    // fingerprinter for chunks. this is reset after each chunk
    final RabinFingerprintLong finger = newFingerprint();

    // counters
    long chunkStart = 0;
    long chunkEnd = 0;

    /*
     * fingerprint one byte at a time. we have to use this granularity to
     * ensure that, for example, a one byte offset at the beginning of the
     * file won't effect the chunk boundaries
     */
    for (byte b : ByteStreams.toByteArray(is)) {
      // push byte into fingerprints
      window.pushByte(b);
      finger.pushByte(b);
      chunkEnd++;

      /*
       * if we've reached a boundary (which we will at some probability
       * based on the boundary pattern and the size of the fingerprint
       * window), we store the current chunk fingerprint and reset the
       * chunk fingerprinter.
       */
      if (boundaryDetector.isBoundary(window)) {
        visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
        finger.reset();
       
        // store last chunk offset
        chunkStart = chunkEnd;
      }
    }

    // final chunk
    visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
  }

  /**
   * Rapidly fingerprint an entire stream's contents.
   */
  public long getFullFingerprint(InputStream is) throws IOException {
    final RabinFingerprintLong finger = newFingerprint();
    finger.pushBytes(ByteStreams.toByteArray(is));
    return finger.getFingerprintLong();
  }
}
TOP

Related Classes of org.rabinfingerprint.handprint.FingerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.