Package org.apache.tez.processor

Source Code of org.apache.tez.processor.FilterByWordInputProcessor

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.processor;

import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.tez.common.TezUtils;
import org.apache.tez.mapreduce.examples.FilterLinesByWord;
import org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.input.MRInputLegacy;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.LogicalIOProcessor;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.TezProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.output.OnFileUnorderedKVOutput;

public class FilterByWordInputProcessor implements LogicalIOProcessor {

  private static final Log LOG = LogFactory.getLog(FilterByWordInputProcessor.class);

  private String filterWord;

  public FilterByWordInputProcessor() {
  }

  @Override
  public void initialize(TezProcessorContext processorContext) throws Exception {
    Configuration conf = TezUtils.createConfFromUserPayload(processorContext.getUserPayload());
    filterWord = conf.get(FilterLinesByWord.FILTER_PARAM_NAME);
    if (filterWord == null) {
      processorContext.fatalError(null, "No filter word specified");
    }
  }

  @Override
  public void handleEvents(List<Event> processorEvents) {
    throw new UnsupportedOperationException("Not expecting any events to the broadcast processor");

  }

  @Override
  public void close() throws Exception {
    LOG.info("Broadcast Processor closing. Nothing to do");
  }

  @Override
  public void run(Map<String, LogicalInput> inputs,
      Map<String, LogicalOutput> outputs) throws Exception {

    if (inputs.size() != 1) {
      throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
    }

    if (outputs.size() != 1) {
      throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
    }

    LogicalInput li = inputs.values().iterator().next();
    if (! (li instanceof MRInput)) {
      throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }

    LogicalOutput lo = outputs.values().iterator().next();
    if (! (lo instanceof OnFileUnorderedKVOutput)) {
      throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }

   
   
   
    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo;

    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
      String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
      if (fileName != null) {
        LOG.info("Processing file: " + fileName);
        srcFile.set(fileName);
      }
    }

    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();

    while (kvReader.next()) {
      Object key = kvReader.getCurrentKey();
      Object val = kvReader.getCurrentValue();

      Text valText = (Text) val;
      String readVal = valText.toString();
      if (readVal.contains(filterWord)) {
        LongWritable lineNum = (LongWritable) key;
        TextLongPair outVal = new TextLongPair(srcFile, lineNum);
        kvWriter.write(valText, outVal);
      }
    }
  }
}
TOP

Related Classes of org.apache.tez.processor.FilterByWordInputProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.