Package org.teiid.query.processor.relational

Source Code of org.teiid.query.processor.relational.TextTableNode

/*
* JBoss, Home of Professional Open Source.
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.  Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA.
*/

package org.teiid.query.processor.relational;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.teiid.api.exception.query.ExpressionEvaluationException;
import org.teiid.common.buffer.BlockedException;
import org.teiid.common.buffer.BufferManager;
import org.teiid.common.buffer.TupleBatch;
import org.teiid.core.TeiidComponentException;
import org.teiid.core.TeiidProcessingException;
import org.teiid.core.types.ClobImpl;
import org.teiid.core.types.ClobType;
import org.teiid.core.types.DataTypeManager;
import org.teiid.core.types.TransformationException;
import org.teiid.query.QueryPlugin;
import org.teiid.query.processor.ProcessorDataManager;
import org.teiid.query.sql.lang.TextTable;
import org.teiid.query.sql.lang.TextTable.TextColumn;
import org.teiid.query.util.CommandContext;

/**
* Handles text file processing.
*
* TODO: unix style escape handling \t \n, etc. - see also the unescape function
* TODO: allow for escaping with fixed parsing
* TODO: allow for fixed parsing without new lines
* TODO: allow for a configurable line terminator
*/
public class TextTableNode extends SubqueryAwareRelationalNode {

  private TextTable table;
 
  //initialized state
  private int skip = 0;
  private int header = -1;
  private boolean noQuote;
  private char quote;
  private char delimiter;
  private int lineWidth;
    private int[] projectionIndexes;
 
    //per file state
  private BufferedReader reader;
  private int textLine = 0;
  private Map<String, Integer> nameIndexes;
  private String systemId;

  private boolean cr;
  private boolean eof;
 
  public TextTableNode(int nodeID) {
    super(nodeID);
  }
 
  @Override
  public void initialize(CommandContext context, BufferManager bufferManager,
      ProcessorDataManager dataMgr) {
    super.initialize(context, bufferManager, dataMgr);
    if (projectionIndexes != null) {
      return;
    }
    if (table.getSkip() != null) {
      skip = table.getSkip();
    }
    if (table.getHeader() != null) {
      skip = Math.max(table.getHeader(), skip);
      header = table.getHeader() - 1;
    }
    if (table.isFixedWidth()) {
      for (TextColumn col : table.getColumns()) {
        lineWidth += col.getWidth();
      }
    } else {
      if (table.getDelimiter() == null) {
        delimiter = ',';
      } else {
        delimiter = table.getDelimiter();
      }
      if (table.getQuote() == null) {
        quote = '"';
      } else {
        noQuote = table.isEscape();
        quote = table.getQuote();
      }
      lineWidth = table.getColumns().size() * DataTypeManager.MAX_STRING_LENGTH;
    }
        Map elementMap = createLookupMap(table.getProjectedSymbols());
        this.projectionIndexes = getProjectionIndexes(elementMap, getElements());
  }
 
  @Override
  public void closeDirect() {
    super.closeDirect();
    reset();
  }
 
  @Override
  public void reset() {
    super.reset();
    if (this.reader != null) {
      try {
        this.reader.close();
      } catch (IOException e) {
      }
      this.reader = null;
    }
    this.nameIndexes = null;
    this.textLine = 0;
    this.cr = false;
    this.eof = false;
  }
 
  public void setTable(TextTable table) {
    this.table = table;
  }

  @Override
  public TextTableNode clone() {
    TextTableNode clone = new TextTableNode(getID());
    this.copy(this, clone);
    clone.setTable(table);
    return clone;
  }

  @Override
  protected TupleBatch nextBatchDirect() throws BlockedException,
      TeiidComponentException, TeiidProcessingException {
   
    if (reader == null) {
      initReader();
    }

    if (reader == null) {
      terminateBatches();
      return pullBatch();
    }
   
    while (!isBatchFull()) {
      String line = readLine(lineWidth, table.isFixedWidth());
     
      if (line == null) {
        terminateBatches();
        break;
      }
     
      List<String> vals = parseLine(line);
     
      List<Object> tuple = new ArrayList<Object>(projectionIndexes.length);
      for (int output : projectionIndexes) {
        TextColumn col = table.getColumns().get(output);
        String val = null;
        int index = output;
        if (nameIndexes != null) {
          index = nameIndexes.get(col.getName());
        }
        if (index >= vals.size()) {
          throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.no_value", col.getName(), textLine, systemId)); //$NON-NLS-1$
        }
        val = vals.get(index);
        try {
          tuple.add(DataTypeManager.transformValue(val, table.getColumns().get(output).getSymbol().getType()));
        } catch (TransformationException e) {
          throw new TeiidProcessingException(e, QueryPlugin.Util.getString("TextTableNode.conversion_error", col.getName(), textLine, systemId)); //$NON-NLS-1$
        }
      }
      addBatchRow(tuple);
    }
   
    return pullBatch();
  }

  private String readLine(int maxLength, boolean exact) throws TeiidProcessingException {
    if (eof) {
      return null;
    }
    StringBuilder sb = new StringBuilder(exact ? maxLength : (maxLength >> 4));
    while (true) {
      char c = readChar();
      if (c == '\n') {
        if (sb.length() == 0) {
          if (eof) {
            return null;
          }
          continue; //skip empty lines
        }
          if (exact && sb.length() < lineWidth) {
            throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.invalid_width", sb.length(), lineWidth, textLine, systemId)); //$NON-NLS-1$
          }
        return sb.toString();
        }
        sb.append(c);
        if (sb.length() > maxLength) {
          if (exact) {
            sb.deleteCharAt(sb.length() - 1);
            //we're not forcing them to fully specify the line, so just drop the rest
            //TODO: there should be a max read length
            while (readChar() != '\n') {
             
            }
            return sb.toString();
          }
          throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.line_too_long", textLine+1, systemId, maxLength)); //$NON-NLS-1$ 
        }
    }
  }
 
  private char readChar() throws TeiidProcessingException {
    try {
      int c = reader.read();
        if (cr) {
        if (c == '\n') {
            c = reader.read();
        }
        cr = false;
        }
        switch (c) {
        case '\r':
        cr = true;
        textLine++;
        return '\n';
        case -1:
          eof = true;
          textLine++;
        return '\n';
        case '\n':   
        textLine++;
        return '\n';
        }
        return (char)c;
    } catch (IOException e) {
      throw new TeiidProcessingException(e);
    }
  }

  private void initReader() throws ExpressionEvaluationException,
      BlockedException, TeiidComponentException, TeiidProcessingException {
   
    setReferenceValues(this.table);
    ClobType file = (ClobType)getEvaluator(Collections.emptyMap()).evaluate(table.getFile(), null);
    if (file == null) {
      return;
    }
   
    //get the reader
    try {
      this.systemId = "Unknown"; //$NON-NLS-1$
      if (file.getReference() instanceof ClobImpl) {
        this.systemId = ((ClobImpl)file.getReference()).getStreamFactory().getSystemId();
        if (this.systemId == null) {
          this.systemId = "Unknown"; //$NON-NLS-1$
        }
      }
      Reader r = file.getCharacterStream();
      if (!(r instanceof BufferedReader)) {
        reader = new BufferedReader(r);
      } else {
        reader = (BufferedReader)r;
      }
    } catch (SQLException e) {
      throw new TeiidProcessingException(e);
    }
   
    //process the skip field
    if (skip <= 0) {
      return;
    }
    while (textLine < skip) {
      boolean isHeader = textLine == header;
      if (isHeader) {
        String line = readLine(DataTypeManager.MAX_STRING_LENGTH * 16, false);
        if (line == null) { //just return an empty batch
          reset();
          return;
        }
        processHeader(parseLine(line));
      } else {
        while (readChar() != '\n') {
           
          }
      }
    }
  }

  private void processHeader(List<String> line) throws TeiidProcessingException {
    nameIndexes = new HashMap<String, Integer>();
    this.lineWidth = DataTypeManager.MAX_STRING_LENGTH * line.size();
    for (String string : line) {
      if (string == null) {
        continue;
      }
      nameIndexes.put(string.toUpperCase(), nameIndexes.size());
    }
    for (TextColumn col : table.getColumns()) {
      Integer index = nameIndexes.get(col.getName().toUpperCase());
      if (index == null) {
        throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.header_missing", col.getName(), systemId)); //$NON-NLS-1$
      }
      nameIndexes.put(col.getName(), index);
    }
  }

  private List<String> parseLine(String line) throws TeiidProcessingException {
    if (table.isFixedWidth()) {
      return parseFixedWidth(line);
    }
    return parseDelimitedLine(line);
  }

  private List<String> parseDelimitedLine(String line) throws TeiidProcessingException {
    ArrayList<String> result = new ArrayList<String>();
    StringBuilder builder = new StringBuilder();
    boolean escaped = false;
    boolean wasQualified = false;
    boolean qualified = false;
    while (true) {
      if (line == null) {
        if (escaped) {
          //allow for escaped new lines
          if (cr) {
            builder.append('\r');
          }
          builder.append('\n');
          escaped = false;
          line = readLine(lineWidth, false);
          continue;
        }
        if (!qualified) {
          //close the last entry
          addValue(result, wasQualified, builder.toString());
          return result;
        }
        line = readLine(lineWidth, false);
        if (line == null) {
          throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.unclosed", systemId)); //$NON-NLS-1$
        }
      }
      char[] chars = line.toCharArray();
      for (int i = 0; i < chars.length; i++) {
        char chr = chars[i];
        if (chr == delimiter) {
          if (escaped || qualified) {
            builder.append(chr);
            escaped = false;
          } else {
            addValue(result, wasQualified, builder.toString());
            wasQualified = false;
            builder = new StringBuilder()//next entry
          }
        } else if (chr == quote) {
          if (noQuote) {   //it's the escape char
            if (escaped) {
              builder.append(quote);
            }
            escaped = !escaped;
          } else {
            if (qualified) {
              qualified = false;
            } else {
              if (wasQualified) {
                qualified = true;
                builder.append(chr);
              } else {
                if (builder.toString().trim().length() != 0) {
                  throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.character_not_allowed", textLine, systemId)); //$NON-NLS-1$
                }
                qualified = true;
                builder = new StringBuilder(); //start the entry over
                wasQualified = true;
              }
            }
          }
        } else {
          if (escaped) {
            //don't understand other escape sequences yet
            throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.unknown_escape", chr, textLine, systemId)); //$NON-NLS-1$
          }
          if (wasQualified && !qualified) {
            if (!Character.isWhitespace(chr)) {
              throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.character_not_allowed", textLine, systemId)); //$NON-NLS-1$
            }
            //else just ignore
          } else {
            builder.append(chr);
          }
        }
      }
      line = null;
    }
  }

  private void addValue(ArrayList<String> result, boolean wasQualified, String val) {
    if (!wasQualified) {
      val = val.trim();
      if (val.length() == 0) {
        val = null;
      }
    }
    result.add(val);
  }

  private List<String> parseFixedWidth(String line) {
    ArrayList<String> result = new ArrayList<String>();
    int beginIndex = 0;
    for (TextColumn col : table.getColumns()) {
      String val = new String(line.substring(beginIndex, beginIndex + col.getWidth()));
      addValue(result, false, val);
      beginIndex += col.getWidth();
    }
    return result;
  }
 
}
TOP

Related Classes of org.teiid.query.processor.relational.TextTableNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.