Package org.lilyproject.repository.bulk

Source Code of org.lilyproject.repository.bulk.BulkIngester$BlobsNotSupportedBlobManager

/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.repository.bulk;

import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.zookeeper.KeeperException;
import org.lilyproject.client.LilyClient;
import org.lilyproject.repository.api.Blob;
import org.lilyproject.repository.api.BlobAccess;
import org.lilyproject.repository.api.BlobException;
import org.lilyproject.repository.api.BlobManager;
import org.lilyproject.repository.api.BlobReference;
import org.lilyproject.repository.api.BlobStoreAccess;
import org.lilyproject.repository.api.FieldType;
import org.lilyproject.repository.api.FieldTypes;
import org.lilyproject.repository.api.IdGenerator;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.Record;
import org.lilyproject.repository.api.RecordFactory;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.Repository;
import org.lilyproject.repository.api.RepositoryException;
import org.lilyproject.repository.api.RepositoryManager;
import org.lilyproject.repository.api.TypeManager;
import org.lilyproject.repository.impl.HBaseRepository;
import org.lilyproject.repository.impl.HBaseRepository.FieldValueWriter;
import org.lilyproject.repository.impl.HBaseRepositoryManager;
import org.lilyproject.repository.impl.HBaseTypeManager;
import org.lilyproject.repository.impl.RecordFactoryImpl;
import org.lilyproject.repository.impl.id.IdGeneratorImpl;
import org.lilyproject.repository.model.api.RepositoryModel;
import org.lilyproject.repository.model.impl.RepositoryModelImpl;
import org.lilyproject.util.exception.ExceptionUtil;
import org.lilyproject.util.hbase.HBaseTableFactory;
import org.lilyproject.util.hbase.HBaseTableFactoryImpl;
import org.lilyproject.util.hbase.LilyHBaseSchema;
import org.lilyproject.util.hbase.RepoAndTableUtil;
import org.lilyproject.util.repo.RecordEvent;
import org.lilyproject.util.repo.RecordEvent.Type;
import org.lilyproject.util.zookeeper.ZkUtil;
import org.lilyproject.util.zookeeper.ZooKeeperItf;
import org.python.google.common.collect.Lists;

/**
* Writes Lily records in bulk to HBase. Also provides methods for creating HBase {@code Put} or
* {@code KeyValue} object for alternative methods of writing (e.g. via MapReduce).
*/
public class BulkIngester implements Closeable {

    public static final int PUT_BUFFER_SIZE = 1000;

    /**
     * Bulk mode is default. If not in bulk mode, the bulk ingester merely delegates all operations to lily client.
     */
    private boolean bulkMode = true;

    private LilyClient lilyClient;

    private HBaseRepository hbaseRepo;
    private RecordFactory recordFactory;
    private HTableInterface recordTable;
    private FieldTypes fieldTypes;
    private List<Put> putBuffer = Lists.newArrayListWithCapacity(PUT_BUFFER_SIZE);

    public boolean isBulkMode() {
        return bulkMode;
    }

    /**
     * Factory method for creation of a {@code BulkIngester} that operates on the default repository table.
     *
     * @param zkConnString Connection string for ZooKeeper
     * @param timeout      ZooKeeper session timeout
     * @return a new BulkIngester
     */
    public static BulkIngester newBulkIngester(String zkConnString, int timeout) {
        return newBulkIngester(zkConnString, timeout, RepoAndTableUtil.DEFAULT_REPOSITORY, LilyHBaseSchema.Table.RECORD.name,
                true);
    }

    /**
     * Factory method for creation of a {@code BulkIngester} that operates on a non-default repository table.
     *
     * @param zkConnString connection string for ZooKeeper
     * @param timeout      ZooKeeper session timeout
     * @param tableName    name of the repository table to write to
     */
    public static BulkIngester newBulkIngester(String zkConnString, int timeout, String repositoryName, String tableName,
                                               boolean bulkMode) {
        try {
            ZooKeeperItf zk = ZkUtil.connect(zkConnString, timeout);

            // we need a lily client for non bulk access
            LilyClient lilyClient = new LilyClient(zk);

            // we need an HBaseRepository for bulk access
            Configuration conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum", zkConnString);
            HBaseTableFactory hbaseTableFactory = new HBaseTableFactoryImpl(conf);
            HBaseRepository hbaseRepository = createHBaseRepository(repositoryName, tableName, zk, conf, hbaseTableFactory);

            return new BulkIngester(
                    lilyClient,
                    hbaseRepository,
                    LilyHBaseSchema.getRecordTable(hbaseTableFactory, hbaseRepository.getRepositoryName(),
                            hbaseRepository.getTableName()),
                    bulkMode);

        } catch (Exception e) {
            ExceptionUtil.handleInterrupt(e);
            throw new RuntimeException(e);
        }
    }

    private static HBaseRepository createHBaseRepository(String repositoryName, String tableName, ZooKeeperItf zk,
                                                         Configuration conf, HBaseTableFactory hbaseTableFactory)
            throws KeeperException, InterruptedException, IOException, RepositoryException {
        RepositoryModel repositoryModel = new RepositoryModelImpl(zk);
        IdGenerator idGenerator = new IdGeneratorImpl();
        TypeManager typeManager = new HBaseTypeManager(idGenerator, conf, zk, hbaseTableFactory);
        RecordFactory recordFactory = new RecordFactoryImpl();

        RepositoryManager repositoryManager = new HBaseRepositoryManager(typeManager, idGenerator,
                recordFactory, hbaseTableFactory, new BlobsNotSupportedBlobManager(), conf, repositoryModel);
        HBaseRepository hbaseRepository;
        if (tableName != null) {
            hbaseRepository = (HBaseRepository) repositoryManager.getRepository(repositoryName).getTable(tableName);
        } else {
            hbaseRepository = (HBaseRepository) repositoryManager.getRepository(repositoryName);
        }
        return hbaseRepository;
    }

    BulkIngester(LilyClient lilyClient, HBaseRepository hbaseRepo, HTableInterface recordTable, boolean bulkMode)
            throws InterruptedException {
        this.lilyClient = lilyClient;
        this.hbaseRepo = hbaseRepo;
        this.recordFactory = hbaseRepo.getRecordFactory();
        this.recordTable = recordTable;
        this.fieldTypes = hbaseRepo.getTypeManager().getFieldTypesSnapshot();
        this.bulkMode = bulkMode;
    }

    /**
     * Factory method for creation of Records, with the same semantics as
     * {@link Repository#newRecord()}.
     *
     * @return A newly-created record
     */
    public Record newRecord() {
        return recordFactory.newRecord();
    }

    /**
     * Same as {@link Repository#getIdGenerator()}.
     *
     * @return The IdGenerator of the underlying repository
     */
    public IdGenerator getIdGenerator() {
        return hbaseRepo.getIdGenerator();
    }

    /**
     * When in bulk mode, write a single record directly to HBase, circumventing any indexing or other secondary actions
     * that are performed when using the standard Lily API.
     * <p>
     * <b>WARNING:</b>This method is not thread-safe.
     * <p>
     * Puts are first written to a buffer, which is flushed when it reaches {@link BulkIngester#PUT_BUFFER_SIZE}.
     *
     * When not in bulk mode, this merely delegates to createOrUpdate on the Lily HBase repository.
     *
     * @param record Record to be written
     */
    public void write(Record record) throws InterruptedException, RepositoryException, IOException {
        if (bulkMode) {
            putBuffer.add(buildPut(record));
            if (putBuffer.size() == PUT_BUFFER_SIZE) {
                flush();
            }
        } else {
            lilyClient.getRepository(hbaseRepo.getRepositoryName()).getTable(hbaseRepo.getTableName()).createOrUpdate(record);
        }
    }

    /**
     * Flush buffered Puts to the Lily record table.
     * <p>
     * This method is not thread-safe.
     */
    public void flush() throws IOException {
        if (!putBuffer.isEmpty()) {
            recordTable.put(Lists.newArrayList(putBuffer));
            putBuffer.clear();
        }
    }

    /**
     * Build the {@code Put} that represents a record for inserting into HBase.
     *
     * @param record The record to be translated into an HBase {@code Put}
     * @return Put which can be directly written to HBase
     */
    public Put buildPut(Record record) throws InterruptedException, RepositoryException {
        RecordEvent recordEvent = new RecordEvent();
        recordEvent.setType(Type.CREATE);
        recordEvent.setTableName(hbaseRepo.getTableName());
        // set empty IndexRecordFilterData to omit the warnings in the IndexEditFilter
        recordEvent.setIndexRecordFilterData(new RecordEvent.IndexRecordFilterData());
        if (record.getId() == null) {
            record.setId(getIdGenerator().newRecordId());
        }
        Put put = hbaseRepo.buildPut(record, 1L, fieldTypes, recordEvent, Sets.<BlobReference>newHashSet(),
                Sets.<BlobReference>newHashSet(), 1L);
        put.add(LilyHBaseSchema.RecordCf.DATA.bytes, LilyHBaseSchema.RecordColumn.PAYLOAD.bytes, recordEvent.toJsonBytes());
        return put;
    }

    /**
     * Build a {@code Put} to update a record. No metadata updates are performed, and any existing metadata on the
     * fields will be overwritten.
     * <p>
     * The record to be updated must exist, otherwise a "partial" record will be created. No checking is done to ensure
     * that the record to be updated exists.
     * <p>
     * Additionally, records updated in this manner must be unversioned records.
     * <p>
     * In other words, use this method at your own risk. Unless you are very certain about the context you are working
     * in, updates should go via the Lily API.
     *
     * @param recordId    identifier of the record to be updated
     * @param fieldValues map of field names and values to be updated on the record
     * @return Put containing all field updates
     */
    public Put buildRecordUpdate(RecordId recordId, Map<QName, Object> fieldValues) {
        Put put = new Put(recordId.toBytes());
        FieldValueWriter fieldValueWriter = hbaseRepo.newFieldValueWriter(put, null);

        for (Entry<QName, Object> fieldEntry : fieldValues.entrySet()) {
            try {
                fieldValueWriter.addFieldValue(fieldTypes.getFieldType(fieldEntry.getKey()), fieldEntry.getValue(), null);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        return put;
    }

    @Override
    public void close() throws IOException {
        flush();
        lilyClient.close();
    }


    /**
     * Returns the underlying repository manager. This allows performing any other kind of
     * repository-based task within the context of a bulk import job.
     *
     * @return the underlying RepositoryManager
     */
    public RepositoryManager getRepositoryManager() {
        return lilyClient;
    }


    /**
     * BlobManager that ensures that blobs aren't supported in bulk import.
     * <p>
     * In the future, it may be interesting to support blobs, but at the moment it seems as this
     * would likely lead to poor import performance.
     */
    private static class BlobsNotSupportedBlobManager implements BlobManager {

        private static final String NOT_SUPPORTED_MESSAGE = "Blobs are not supported for bulk imports";

        @Override
        public void incubateBlob(byte[] blobKey) throws IOException {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }

        @Override
        public Set<BlobReference> reserveBlobs(Set<BlobReference> blobs) throws IOException {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }

        @Override
        public void handleBlobReferences(RecordId recordId, Set<BlobReference> referencedBlobs,
                                         Set<BlobReference> unReferencedBlobs) {
            // no op
        }

        @Override
        public OutputStream getOutputStream(Blob blob) throws BlobException {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }

        @Override
        public BlobAccess getBlobAccess(Record record, QName fieldName, FieldType fieldType, int... indexes)
                throws BlobException {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }

        @Override
        public void register(BlobStoreAccess blobStoreAccess) {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }

        @Override
        public void delete(byte[] blobKey) throws BlobException {
            throw new UnsupportedOperationException(NOT_SUPPORTED_MESSAGE);
        }


    }

}
TOP

Related Classes of org.lilyproject.repository.bulk.BulkIngester$BlobsNotSupportedBlobManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.