Package v7db.files.mongodb

Source Code of v7db.files.mongodb.MongoContentStorage

/**
* Copyright (c) 2012, Thilo Planz. All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package v7db.files.mongodb;

import static v7db.files.mongodb.QueryUtils._ID;

import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.bson.BSONObject;

import v7db.files.Compression;
import v7db.files.MapUtils;
import v7db.files.ZipFile;
import v7db.files.spi.Content;
import v7db.files.spi.ContentConcatenation;
import v7db.files.spi.ContentPointer;
import v7db.files.spi.ContentSHA;
import v7db.files.spi.ContentStorage;
import v7db.files.spi.GzippedContent;
import v7db.files.spi.InlineContent;
import v7db.files.spi.OffsetAndLength;
import v7db.files.spi.StorageScheme;
import v7db.files.spi.StoredContent;

import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.mongodb.gridfs.GridFS;

/**
* ContentStorage implementation that uses MongoDB documents.
*
* <ul>
* <li>The <code>_id</code> field is the content SHA-1 digest (20 bytes of
* binary data)
* <li>"Small" content (that does not need to be chunked) is stored in the
* binary field <code>in</code>.
* <li>If the data can be compressed using gzip, it will be stored in compressed
* form as <code>zin</code>. This mode is indicated by setting the value
* <code>gz</code> for the <code>store</code> field. The uncompressed length is
* given in the <code>length</code> field.
* <li>"Large" content is stored as the concatenation of chunks stored
* out-of-band (in other documents). For very large documents this can also
* become nested.
* <li>Other types of "out-of-band" storage schemes are possible and can be
* provided by extension code.
* </ul>
*
* @see https://github.com/thiloplanz/v7files/wiki/StorageFormat
*
*
*/

public class MongoContentStorage implements ContentStorage {

  private static final int chunkSize = GridFS.DEFAULT_CHUNKSIZE;

  private final DBCollection contentCollection;

  private final Map<String, StorageScheme> storageSchemes = new HashMap<String, StorageScheme>();

  public final static String DEFAULT_CONTENT_COLLECTION_NAME = "v7files.content";

  public MongoContentStorage(DB db) {
    this(db.getCollection(DEFAULT_CONTENT_COLLECTION_NAME));
  }

  public MongoContentStorage(DBCollection contentCollection) {
    this.contentCollection = contentCollection;
    storageSchemes.put("gz", new GzippedContent());
    storageSchemes.put("cat", new ContentConcatenation());
    storageSchemes.put("zip", new ZipFile.ContentFromZipFile());
  }

  public Content getContent(byte[] sha) throws IOException {
    return getContent(contentCollection.findOne(sha));
  }

  public ContentSHA findContentPointerByPrefix(byte[] shaPrefix)
      throws IOException {
    if (shaPrefix.length == 20) {
      DBObject file = contentCollection.findOne(shaPrefix);
      if (file == null)
        return null;
      Content c = getContent(file);
      return ContentSHA.forDigestAndLength(shaPrefix, c.getLength());
    }

    if (shaPrefix.length > 20)
      throw new IllegalArgumentException();

    byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded
    byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded
    for (int i = shaPrefix.length; i < higher.length; i++) {
      higher[i] = (byte) 0xFF;
    }
    List<DBObject> files = contentCollection.find(
        QueryUtils.between(_ID, lower, higher), new BasicDBObject())
        .limit(2).toArray();
    if (files.isEmpty())
      return null;
    if (files.size() == 1) {
      Content c = getContent(files.get(0));
      return ContentSHA.forDigestAndLength(
          (byte[]) files.get(0).get(_ID), c.getLength());
    }
    throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix)
        + " is not a unique SHA prefix");
  }

  public Content findContentByPrefix(byte[] shaPrefix) throws IOException {
    if (shaPrefix.length == 20)
      return getContent(shaPrefix);
    if (shaPrefix.length > 20)
      throw new IllegalArgumentException();

    byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded
    byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded
    for (int i = shaPrefix.length; i < higher.length; i++) {
      higher[i] = (byte) 0xFF;
    }

    List<DBObject> files = contentCollection.find(
        QueryUtils.between(_ID, lower, higher), new BasicDBObject())
        .limit(2).toArray();
    if (files.isEmpty())
      return null;
    if (files.size() == 1)
      return getContent(files.get(0));
    throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix)
        + " is not a unique SHA prefix");
  }

  public Content getContent(ContentPointer pointer) throws IOException {
    if (pointer == null)
      return null;
    if (pointer instanceof InlineContent)
      return (Content) pointer;

    if (pointer instanceof ContentSHA) {
      ContentSHA p = (ContentSHA) pointer;
      byte[] sha = p.getSHA();

      Content base = getContent(sha);
      if (base == null)
        throw new IllegalArgumentException("base SHA not found: "
            + Hex.encodeHexString(sha));

      return base;

    }

    if (pointer instanceof StoredContent) {
      StoredContent p = (StoredContent) pointer;
      byte[] sha = p.getBaseSHA();

      Content base = getContent(sha);
      if (base == null)
        throw new IllegalArgumentException("base SHA not found: "
            + Hex.encodeHexString(sha));

      if (p.getLength() != base.getLength()) {
        return new OffsetAndLength(base, 0, p.getLength());
      }

      return base;

    }
    throw new IllegalArgumentException(pointer.getClass().toString());

  }

  @SuppressWarnings("unchecked")
  private Content getContent(BSONObject data) throws IOException {
    if (data == null)
      return null;
    data.removeField("_id");
    String store = BSONUtils.getString(data, "store");
    if (store == null || "raw".equals(store)) {
      return InlineContent.deserialize(data.toMap());
    }
    StorageScheme s = storageSchemes.get(store);
    if (s != null)
      return s.getContent(this, data.toMap());
    throw new UnsupportedOperationException(store);
  }

  /**
   * read into the buffer, continuing until the stream is finished or the
   * buffer is full.
   *
   * @return the number of bytes read, which could be 0 (not -1)
   * @throws IOException
   */
  private static int readFully(InputStream data, byte[] buffer)
      throws IOException {
    int read = data.read(buffer);
    if (read == -1) {
      return 0;
    }
    while (read < buffer.length) {
      int added = data.read(buffer, read, buffer.length - read);
      if (added == -1)
        return read;
      read += added;
    }
    return read;
  }

  public ContentSHA storeContent(InputStream data) throws IOException {
    try {
      MessageDigest completeSHA = MessageDigest.getInstance("SHA");
      long completeLength = 0;
      byte[] chunk = new byte[chunkSize];
      int read;
      List<ContentSHA> chunks = new ArrayList<ContentSHA>();

      while (0 < (read = readFully(data, chunk))) {
        completeSHA.update(chunk, 0, read);
        completeLength += read;
        chunks.add(storeContentChunk(chunk, 0, read));
      }
      if (chunks.isEmpty())
        return storeContentChunk(ArrayUtils.EMPTY_BYTE_ARRAY, 0, 0);

      if (chunks.size() == 1)
        return chunks.get(0);

      List<Map<String, Object>> bases = new ArrayList<Map<String, Object>>(
          chunks.size());
      for (ContentSHA c : chunks) {
        bases.add(c.serialize());
      }
      ContentSHA result = ContentSHA.forDigestAndLength(completeSHA
          .digest(), completeLength);
      long existing = contentCollection.count(new BasicDBObject(_ID,
          result.getSHA()));
      if (existing == 0) {
        contentCollection
            .insert(new BasicDBObject(_ID, result.getSHA()).append(
                "store", "cat").append("base", bases),
                WriteConcern.SAFE);
      }
      return result;
    } catch (NoSuchAlgorithmException e) {
      throw new RuntimeException(e);
    } finally {
      IOUtils.closeQuietly(data);
    }

  }

  private ContentSHA storeContentChunk(byte[] bytes, final int offset,
      final int length) throws IOException {
    ContentSHA _sha = ContentSHA.calculate(bytes, offset, length);
    byte[] sha = _sha.getSHA();

    long existing = contentCollection.count(new BasicDBObject(_ID, sha));
    if (existing == 0) {
      byte[] gzipped = Compression.gzip(bytes, offset, length);
      if (gzipped != null && gzipped.length > chunkSize)
        gzipped = null;
      if (gzipped != null) {
        bytes = null;
        contentCollection.insert(new BasicDBObject(_ID, sha).append(
            "zin", gzipped).append("store", "gz"),
            WriteConcern.SAFE);
        gzipped = null;
      } else {
        if (offset > 0 || bytes.length != length) {
          bytes = ArrayUtils.subarray(bytes, offset, offset + length);
        }
        contentCollection.insert(new BasicDBObject(_ID, sha).append(
            "in", bytes), WriteConcern.SAFE);
      }
    }
    return _sha;
  }

  public ContentPointer storeContent(Map<String, Object> storageScheme)
      throws IOException {
    StorageScheme s = storageSchemes.get(storageScheme.get("store"));
    if (s == null)
      throw new UnsupportedOperationException(storageScheme.toString());

    DBObject x = new BasicDBObject();
    for (Map.Entry<String, Object> e : storageScheme.entrySet()) {
      x.put(e.getKey(), e.getValue());
    }
    long length = BSONUtils.getRequiredLong(x, "length");
    byte[] sha = DigestUtils.sha(s.getContent(this, storageScheme)
        .getInputStream());

    long existing = contentCollection.count(new BasicDBObject(_ID, sha));
    if (existing == 0) {
      x.put(_ID, sha);
      contentCollection.insert(x, WriteConcern.SAFE);
    }
    return new StoredContent(sha, length);
  }

  /**
   * Supported formats: 1) Serialized ContentPointers, e.g.
   *
   * <pre>
   * { in: [bytes] }
   * </pre>
   *
   * and
   *
   * <pre>
   * { sha: <sha>, length: 123 }
   * </pre>
   *
   * 2) Internal StorageScheme representations (must have {store: something}")
   */
  public Content getContent(Map<String, Object> data) throws IOException {
    if (data == null)
      return null;
    String store = MapUtils.getString(data, "store");
    if (store == null || "raw".equals(store)) {
      if (data.containsKey("in"))
        return InlineContent.deserialize(data);
      if (data.containsKey("sha")) {
        return new StoredContent((byte[]) data.get("sha"), MapUtils
            .getRequiredLong(data, "length")).loadOrLazyLoad(this,
            8 * 1024);
      }
      throw new UnsupportedOperationException(data.toString());
    }

    StorageScheme s = storageSchemes.get(store);
    if (s == null)
      throw new UnsupportedOperationException(store);

    return s.getContent(this, data);
  }
}
TOP

Related Classes of v7db.files.mongodb.MongoContentStorage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.