Package com.ngdata.hbaseindexer.mr

Source Code of com.ngdata.hbaseindexer.mr.IndexerDryRun

/*
* Copyright 2013 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ngdata.hbaseindexer.mr;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.ngdata.hbaseindexer.conf.IndexerComponentFactory;
import com.ngdata.hbaseindexer.conf.IndexerComponentFactoryUtil;
import com.ngdata.hbaseindexer.conf.IndexerConf;
import com.ngdata.hbaseindexer.conf.IndexerConf.RowReadMode;
import com.ngdata.hbaseindexer.conf.IndexerConfBuilder;
import com.ngdata.hbaseindexer.indexer.Indexer;
import com.ngdata.hbaseindexer.indexer.ResultToSolrMapperFactory;
import com.ngdata.hbaseindexer.indexer.ResultWrappingRowData;
import com.ngdata.hbaseindexer.indexer.RowData;
import com.ngdata.hbaseindexer.indexer.SolrInputDocumentWriter;
import com.ngdata.hbaseindexer.morphline.MorphlineResultToSolrMapper;
import com.ngdata.hbaseindexer.parse.ResultToSolrMapper;
import com.ngdata.sep.util.io.Closer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.hadoop.ForkedMapReduceIndexerTool;
import org.apache.solr.hadoop.MorphlineClasspathUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Runs an indexer over HBase data, printing the output to a PrintStream.
* <p>
* The purpose of this class is to allow for quickly debugging indexing
* before actually writing it to
*/
class IndexerDryRun {
   
    private static final Logger LOG = LoggerFactory.getLogger(IndexerDryRun.class);

    private HBaseIndexingOptions indexingOpts;
    private Configuration hbaseConf;
    private SolrInputDocumentWriter documentWriter;

    /**
     * @param hbaseIndexingOpts definition of the indexing job to be run
     * @param hbaseConf contains information for connecting to HBase
     * @param output output stream to print index documents
     */
    public IndexerDryRun(HBaseIndexingOptions hbaseIndexingOpts, Configuration hbaseConf, OutputStream output) {
        this.indexingOpts = hbaseIndexingOpts;
        this.hbaseConf = hbaseConf;
        this.documentWriter = new DryRunSolrInputDocumentWriter(output);
    }
   
    /**
     * Run a dry run over the configured input, printing SolrInputDocuments to
     * the configured output.
     *
     * @return 0 if successful, non-zero otherwise
     */
    int run() {
     
        long programStartTime = System.currentTimeMillis();
        IndexingSpecification indexingSpec = indexingOpts.getIndexingSpecification();

        IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(indexingSpec.getIndexerComponentFactory(), new ByteArrayInputStream(indexingSpec.getConfiguration()), Maps.<String, String>newHashMap());
        IndexerConf indexerConf = factory.createIndexerConf();

        if (indexerConf.getRowReadMode() != RowReadMode.NEVER) {
            LOG.warn("Changing row read mode from " + indexerConf.getRowReadMode() + " to " + RowReadMode.NEVER);
            indexerConf = new IndexerConfBuilder(indexerConf).rowReadMode(RowReadMode.NEVER).build();
        }

        Map<String, String> params = indexerConf.getGlobalParams();
        if (indexingOpts.morphlineFile != null) {
            params.put(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, indexingOpts.morphlineFile.getPath());
        }
        if (indexingOpts.morphlineId != null) {
            params.put(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, indexingOpts.morphlineId);
        }

        for (Map.Entry<String, String> entry : hbaseConf) {
            if (entry.getKey().startsWith(MorphlineResultToSolrMapper.MORPHLINE_VARIABLE_PARAM + ".")) {
                params.put(entry.getKey(), entry.getValue());
            }
            if (entry.getKey().startsWith(MorphlineResultToSolrMapper.MORPHLINE_FIELD_PARAM + ".")) {
                params.put(entry.getKey(), entry.getValue());
            }
        }

        MorphlineClasspathUtil.setupJavaCompilerClasspath();
       
        ResultToSolrMapper resultToSolrMapper = ResultToSolrMapperFactory.createResultToSolrMapper(
                indexingSpec.getIndexerName(),
                indexerConf);
       
        Indexer indexer = Indexer.createIndexer(
                                indexingSpec.getIndexerName(),
                                indexerConf,
                                indexingSpec.getTableName(),
                                resultToSolrMapper,
                                null,
                                null,
                                documentWriter);
       
        Scan scan = indexingOpts.getScans().get(0);
       
        HTable htable = null;
        try {
            htable = new HTable(hbaseConf, indexingSpec.getTableName());
            ResultScanner scanner = htable.getScanner(scan);
            for (Result result : scanner) {
                indexer.indexRowData(ImmutableList.<RowData>of(new ResultWrappingRowData(result,
                        indexingSpec.getTableName().getBytes(Charsets.UTF_8))));
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            Closer.close(htable);
        }
       
        ForkedMapReduceIndexerTool.goodbye(null, programStartTime);       
        return 0;
       
    }
   
   
    static class DryRunSolrInputDocumentWriter implements SolrInputDocumentWriter {
       
        private PrintWriter printWriter;

        public DryRunSolrInputDocumentWriter(OutputStream outputStream) {
            this.printWriter = new PrintWriter(outputStream, true);
        }

        @Override
        public void add(int shard, Map<String, SolrInputDocument> inputDocumentMap) throws SolrServerException, IOException {
            for (SolrInputDocument doc : inputDocumentMap.values()) {
                printWriter.println("dryRun: " + doc);
            }
        }

        @Override
        public void deleteById(int shard, List<String> idsToDelete) throws SolrServerException, IOException {
            throw new UnsupportedOperationException("Deletes are not supported in batch mode");
        }

        @Override
        public void deleteByQuery(String deleteQuery) throws SolrServerException, IOException {
            throw new UnsupportedOperationException("Deletes are not supported in batch mode");
        }

        @Override
        public void close() throws SolrServerException, IOException {
        }
       
    }

}
TOP

Related Classes of com.ngdata.hbaseindexer.mr.IndexerDryRun

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.