/*
* Copyright 2013 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ngdata.hbaseindexer.parse;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableSet;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Lists;
import com.ngdata.hbaseindexer.ConfigureUtil;
import com.ngdata.hbaseindexer.conf.DocumentExtractDefinition;
import com.ngdata.hbaseindexer.conf.FieldDefinition;
import com.ngdata.hbaseindexer.parse.extract.ByteArrayExtractors;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.solr.common.SolrInputDocument;
import static com.ngdata.hbaseindexer.metrics.IndexerMetricsUtil.metricName;
import static com.ngdata.sep.impl.HBaseShims.newGet;
/**
* Parses HBase {@code Result} objects into a structure of fields and values.
*/
public class DefaultResultToSolrMapper implements ResultToSolrMapper {
/**
* Map of Solr field names to transformers for extracting data from HBase {@code Result} objects.
*/
private List<SolrDocumentExtractor> resultDocumentExtractors;
/**
* Information to be used for constructing a Get to fetch data required for indexing.
*/
private Map<byte[], NavigableSet<byte[]>> familyMap;
/**
* Used to do evaluation on applicability of KeyValues.
*/
private List<ByteArrayExtractor> extractors;
private Timer mappingTimer;
/**
* Instantiate with {@code FieldDefinitions}s and {@code DocumentExtractDefinition}s.
*
* @param fieldDefinitions define fields to be indexed
* @param documentExtractDefinitions additional document extraction definitions
*/
public DefaultResultToSolrMapper(String indexerName, List<FieldDefinition> fieldDefinitions,
List<DocumentExtractDefinition> documentExtractDefinitions) {
extractors = Lists.newArrayList();
resultDocumentExtractors = Lists.newArrayList();
for (FieldDefinition fieldDefinition : fieldDefinitions) {
ByteArrayExtractor byteArrayExtractor = ByteArrayExtractors.getExtractor(
fieldDefinition.getValueExpression(), fieldDefinition.getValueSource());
ByteArrayValueMapper valueMapper = ByteArrayValueMappers.getMapper(fieldDefinition.getTypeName());
ConfigureUtil.configure(valueMapper, fieldDefinition.getParams());
resultDocumentExtractors.add(new HBaseSolrDocumentExtractor(fieldDefinition.getName(), byteArrayExtractor,
valueMapper));
extractors.add(byteArrayExtractor);
}
for (DocumentExtractDefinition extractDefinition : documentExtractDefinitions) {
ByteArrayExtractor byteArrayExtractor = ByteArrayExtractors.getExtractor(
extractDefinition.getValueExpression(), extractDefinition.getValueSource());
extractors.add(byteArrayExtractor);
}
Get get = newGet();
for (ByteArrayExtractor extractor : extractors) {
byte[] columnFamily = extractor.getColumnFamily();
byte[] columnQualifier = extractor.getColumnQualifier();
if (columnFamily != null) {
if (columnQualifier != null) {
get.addColumn(columnFamily, columnQualifier);
} else {
get.addFamily(columnFamily);
}
}
}
familyMap = get.getFamilyMap();
mappingTimer = Metrics.newTimer(metricName(getClass(), "HBase Result to Solr mapping time", indexerName),
TimeUnit.MILLISECONDS, TimeUnit.SECONDS);
}
@Override
public boolean containsRequiredData(Result result) {
for (ByteArrayExtractor extractor : extractors) {
if (!extractor.containsTarget(result)) {
return false;
}
}
return true;
}
@Override
public boolean isRelevantKV(KeyValue kv) {
for (ByteArrayExtractor extractor : extractors) {
if (extractor.isApplicable(kv)) {
return true;
}
}
return false;
}
@Override
public Get getGet(byte[] row) {
Get get = new Get(row);
for (Entry<byte[], NavigableSet<byte[]>> familyMapEntry : familyMap.entrySet()) {
byte[] columnFamily = familyMapEntry.getKey();
if (familyMapEntry.getValue() == null) {
get.addFamily(columnFamily);
} else {
for (byte[] qualifier : familyMapEntry.getValue()) {
get.addColumn(columnFamily, qualifier);
}
}
}
return get;
}
@Override
public void map(Result result, SolrUpdateWriter solrUpdateWriter) {
TimerContext timerContext = mappingTimer.time();
try {
SolrInputDocument solrInputDocument = new SolrInputDocument();
for (SolrDocumentExtractor documentExtractor : resultDocumentExtractors) {
documentExtractor.extractDocument(result, solrInputDocument);
}
solrUpdateWriter.add(solrInputDocument);
} finally {
timerContext.stop();
}
}
}