Package com.asakusafw.directio.hive.parquet

Source Code of com.asakusafw.directio.hive.parquet.DataModelWriteSupport

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.directio.hive.parquet;

import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;

import parquet.hadoop.api.WriteSupport;
import parquet.io.api.RecordConsumer;
import parquet.schema.MessageType;
import parquet.schema.Type;

import com.asakusafw.directio.hive.serde.DataModelDescriptor;
import com.asakusafw.directio.hive.serde.PropertyDescriptor;
import com.asakusafw.directio.hive.serde.PropertyExtractor;
import com.asakusafw.runtime.value.ValueOption;

/**
* Consumes a data model into {@link RecordConsumer}.
* @since 0.7.0
*/
public class DataModelWriteSupport extends WriteSupport<Object> {

    private final MessageType schema;

    private final Map<String, String> metadata;

    private final PropertyDescriptor[] properties;

    private final String[] names;

    private final ValueWriter[] drivers;

    private RecordConsumer recordConsumer;

    /**
     * Creates a new instance with empty extra-metadata.
     * @param descriptor the target data model descriptor
     */
    public DataModelWriteSupport(DataModelDescriptor descriptor) {
        this(descriptor, Collections.<String, String>emptyMap());
    }

    /**
     * Creates a new instance.
     * @param descriptor the target data model descriptor
     * @param metadata the extra metadata
     */
    public DataModelWriteSupport(DataModelDescriptor descriptor, Map<String, String> metadata) {
        if (descriptor.getPropertyDescriptors().isEmpty()) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Parquet file must contain >= 1 properties: {0}",
                    descriptor.getDataModelClass().getName()));
        }
        this.schema = computeSchema(descriptor);
        this.metadata = metadata == null ? Collections.<String, String>emptyMap() : metadata;
        List<? extends PropertyDescriptor> props = descriptor.getPropertyDescriptors();
        this.properties = props.toArray(new PropertyDescriptor[props.size()]);
        this.names = new String[props.size()];
        this.drivers = new ValueWriter[props.size()];
        for (int i = 0, n = props.size(); i < n; i++) {
            PropertyDescriptor property = props.get(i);
            names[i] = property.getFieldName();
            drivers[i] = ParquetValueDrivers.of(property.getTypeInfo(), property.getValueClass()).getWriter();
        }
    }

    private MessageType computeSchema(DataModelDescriptor descriptor) {
        List<Type> fields = new ArrayList<Type>();
        for (PropertyDescriptor property : descriptor.getPropertyDescriptors()) {
            Type field = computeParquetType(property);
            fields.add(field);
        }
        return new MessageType(
                descriptor.getDataModelClass().getName(),
                fields);
    }

    private Type computeParquetType(PropertyDescriptor property) {
        ParquetValueDriver driver = ParquetValueDrivers.of(
                property.getTypeInfo(),
                property.getValueClass());
        return driver.getType(property.getFieldName());
    }

    @Override
    public WriteContext init(Configuration configuration) {
        return new WriteContext(schema, metadata);
    }

    @Override
    public void prepareForWrite(RecordConsumer consumer) {
        this.recordConsumer = consumer;
    }

    @Override
    public void write(Object value) {
        RecordConsumer consumer = recordConsumer;
        String[] ns = names;
        PropertyExtractor[] ps = properties;
        ValueWriter[] vs = drivers;
        consumer.startMessage();
        for (int index = 0, n = ns.length; index < n; index++) {
            ValueOption<?> property = ps[index].extract(value);
            if (property.isNull() == false) {
                String name = ns[index];
                consumer.startField(name, index);
                vs[index].write(property, consumer);
                consumer.endField(name, index);
            }
        }
        consumer.endMessage();
    }
}
TOP

Related Classes of com.asakusafw.directio.hive.parquet.DataModelWriteSupport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.