Package com.cloudera.cdk.morphline.stdio

Source Code of com.cloudera.cdk.morphline.stdio.ReadClobBuilder$ReadClob

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.morphline.stdio;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.Collections;

import com.cloudera.cdk.morphline.api.Command;
import com.cloudera.cdk.morphline.api.CommandBuilder;
import com.cloudera.cdk.morphline.api.MorphlineContext;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Fields;
import com.typesafe.config.Config;

/**
* Command that emits one record for the entire input stream of the first attachment, interpreting
* the stream as a Character Large Object (CLOB).
*/
public final class ReadClobBuilder implements CommandBuilder {

  @Override
  public Collection<String> getNames() {
    return Collections.singletonList("readClob");
  }

  @Override
  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    return new ReadClob(this, config, parent, child, context);
  }
 
 
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ///////////////////////////////////////////////////////////////////////////////
  private static final class ReadClob extends AbstractParser {

    private final Charset charset;
    private final char[] buffer = new char[8192];
    private StringBuilder clob;
    private int counter = 0;
 
    public ReadClob(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
      super(builder, config, parent, child, context);
      this.charset = getConfigs().getCharset(config, "charset", null);
      validateArguments();
    }
 
    @Override
    protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException {
      if (counter++ % 8192 == 0) {
        clob = new StringBuilder(); // periodically gc memory from large outlier strings
      }
      incrementNumRecords();
      Charset detectedCharset = detectCharset(inputRecord, charset)
      Reader reader = new InputStreamReader(stream, detectedCharset);
      clob.setLength(0);
      int len;
      while ((len = reader.read(buffer)) >= 0) {
        clob.append(buffer, 0, len);
      }
      Record outputRecord = inputRecord.copy();
      removeAttachments(outputRecord);
      outputRecord.replaceValues(Fields.MESSAGE, clob.toString());
       
      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }
     
  }
}
TOP

Related Classes of com.cloudera.cdk.morphline.stdio.ReadClobBuilder$ReadClob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.