Package org.lilyproject.mapreduce

Source Code of org.lilyproject.mapreduce.LilyMapReduceUtil

/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.mapreduce;

import java.util.List;

import com.google.common.base.Joiner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.codehaus.jackson.JsonNode;
import org.lilyproject.client.LilyClient;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.RecordScan;
import org.lilyproject.tools.import_.json.RecordScanWriter;
import org.lilyproject.tools.import_.json.WriteOptions;
import org.lilyproject.util.exception.ExceptionUtil;
import org.lilyproject.util.json.JsonFormat;

public class LilyMapReduceUtil {
    public static final String ZK_CONNECT_STRING = "lily.mapreduce.zookeeper";

    /**
     * Config key for storing the list of repository tables to be run through the mapper.
     */
    public static final String REPOSITORY_TABLES = "lily.mapreduce.tables";

    /**
     * Config key for storing the repository on which the MR job should be executed.
     */
    public static final String REPOSITORY_NAME = "lily.mapreduce.repository";

    private LilyMapReduceUtil() {
    }

    /**
     * Set the necessary parameters inside the job configuration for using Lily as input.
     */
    public static void initMapperJob(RecordScan scan, String zooKeeperConnectString, LRepository repository, Job job) {
        initMapperJob(scan, false, zooKeeperConnectString, repository, job, null);
    }

    /**
     * Initialize a mapper job to run on a specific set of repository tables.
     */
    public static void initMapperJob(RecordScan scan, String zooKeeperConnectString, LRepository repository,
                                        Job job, List<String> repositoryTables) {
        initMapperJob(scan, false, zooKeeperConnectString, repository, job, repositoryTables);

    }

    public static void initMapperJob(RecordScan scan, boolean returnIdRecords, String zooKeeperConnectString,
            LRepository repository, Job job) {
        initMapperJob(scan, returnIdRecords, zooKeeperConnectString, repository, job, null);
    }

    /**
     * Set the necessary parameters inside the job configuration for using Lily as input.
     */
    public static void initMapperJob(RecordScan scan, boolean returnIdRecords, String zooKeeperConnectString,
                        LRepository repository, Job job, List<String> repositoryTables) {
        if (returnIdRecords) {
            job.setInputFormatClass(LilyIdScanInputFormat.class);
        } else {
            job.setInputFormatClass(LilyScanInputFormat.class);
        }

        job.getConfiguration().set(ZK_CONNECT_STRING, zooKeeperConnectString);

        job.getConfiguration().set(REPOSITORY_NAME, repository.getRepositoryName());

        if (repositoryTables != null && !repositoryTables.isEmpty()) {
            job.getConfiguration().set(REPOSITORY_TABLES, Joiner.on(',').join(repositoryTables));
        }

        if (scan != null) {
            try {
                JsonNode node = RecordScanWriter.INSTANCE.toJson(scan, new WriteOptions(), repository);
                String scanData = JsonFormat.serializeAsString(node);
                job.getConfiguration().set(AbstractLilyScanInputFormat.SCAN, scanData);
            } catch (Exception e) {
                ExceptionUtil.handleInterrupt(e);
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * Creates a LilyClient based on the information found in the Configuration object.
     */
    public static LilyClient getLilyClient(Configuration conf) throws InterruptedException {
        String zkConnectString = conf.get(ZK_CONNECT_STRING);
        try {
            return new LilyClient(zkConnectString, 30000);
        } catch (InterruptedException e) {
            throw e;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
TOP

Related Classes of org.lilyproject.mapreduce.LilyMapReduceUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.