Package org.apache.jena.tdbloader4

Source Code of org.apache.jena.tdbloader4.FourthReducer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.jena.tdbloader4;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.GZIPOutputStream;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.jena.tdbloader4.io.LongQuadWritable;
import org.openjena.atlas.event.Event;
import org.openjena.atlas.event.EventManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FourthReducer extends Reducer<LongQuadWritable, NullWritable, NullWritable, NullWritable> {

    private static final Logger log = LoggerFactory.getLogger(FourthReducer.class);

  private Map<String, OutputStream> outputs;
    private FileSystem fs;
    private Path outLocal;
    private Path outRemote;
    private TaskAttemptID taskAttemptID;
    private Counters counters;

  @Override
  public void setup(Context context) {
    this.taskAttemptID = context.getTaskAttemptID();
    outputs = new HashMap<String, OutputStream>();
    String outputRootDirectory = context.getConfiguration()
                        .get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR,
                           Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT);
    try {
      fs = FileSystem.get(context.getConfiguration());
          outRemote = FileOutputFormat.getWorkOutputPath(context);
            outLocal = new Path(outputRootDirectory, context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
          new File(outLocal.toString()).mkdir();
          // TODO: does this make sense?
          fs.setReplication(outLocal, (short)2);
          fs.startLocalOutput(outRemote, outLocal);
    } catch (Exception e) {
        throw new TDBLoader4Exception(e);
    }
    counters = new Counters(context);
  }

  @Override
  public void reduce(LongQuadWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        log.debug("< ({}, {})", key, values.iterator().next());

    String filename = key.getIndexName();
    OutputStream out = getOutputStream(filename);
    if ( out != null ) {
      out.write(Utils.toHex(key.get(0)));
      out.write(' ');
      out.write(Utils.toHex(key.get(1)));
      out.write(' ');
      out.write(Utils.toHex(key.get(2)));
      if ( key.get(3) != -1l ) {
        out.write(' ');
        out.write(Utils.toHex(key.get(3)));       
      }
      out.write('\n');
    }
    context.progress();
    EventManager.send(counters, new Event(Constants.eventRecord, null));
        log.debug("> {}:{}", filename, key);
  }
 
  private OutputStream getOutputStream(String filename) throws IOException {
    OutputStream output = null;
    if ( !outputs.containsKey(filename) ) {
      output = new GZIPOutputStream(new FileOutputStream(outLocal.toString() + "/" + filename + "_" + taskAttemptID + ".gz"));
      outputs.put(filename, output);
    }
    return outputs.get(filename);
  }

  @Override
  public void cleanup(Context context) throws IOException {
    for ( String filename : outputs.keySet() ) {
      outputs.get(filename).close();
    }
      fs.completeLocalOutput(outRemote, outLocal);
      counters.close();
  }

}
TOP

Related Classes of org.apache.jena.tdbloader4.FourthReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.