Package org.teiid.query.processor.relational

Source Code of org.teiid.query.processor.relational.EnhancedSortMergeJoinStrategy

/*
* JBoss, Home of Professional Open Source.
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.  Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA.
*/

package org.teiid.query.processor.relational;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.teiid.common.buffer.IndexedTupleSource;
import org.teiid.common.buffer.STree;
import org.teiid.common.buffer.TupleBrowser;
import org.teiid.common.buffer.TupleSource;
import org.teiid.common.buffer.BufferManager.BufferReserveMode;
import org.teiid.common.buffer.STree.InsertMode;
import org.teiid.core.TeiidComponentException;
import org.teiid.core.TeiidProcessingException;
import org.teiid.core.types.DataTypeManager;
import org.teiid.query.optimizer.relational.rules.NewCalculateCostUtil;
import org.teiid.query.processor.CollectionTupleSource;
import org.teiid.query.sql.lang.OrderBy;
import org.teiid.query.sql.symbol.ElementSymbol;
import org.teiid.query.sql.symbol.SingleElementSymbol;


/**
* Extends the basic fully sorted merge join to check for conditions necessary
* to not fully sort one of the sides.
*
* Will be used for inner joins and only if both sorts are not required.
* Degrades to a normal merge join if the tuples are balanced.
*
* Refined in 7.4 to use a full index if it is small enough or a repeated merge, rather than a partitioning approach (which was really just a single level index)
*/
public class EnhancedSortMergeJoinStrategy extends MergeJoinStrategy {
 
  private TupleSource currentSource;
  private SourceState sortedSource;
  private SourceState notSortedSource;
  private List<?> currentTuple;
  private TupleBrowser tb;
  private int reserved;
  private STree index;
  private int[] reverseIndexes;
  private List<?> sortedTuple;
  private boolean repeatedMerge;
 
  /**
   * Number of index batches we'll allow to marked as prefers memory regardless of buffer space
   */
  private int preferMemCutoff = 8;

  public EnhancedSortMergeJoinStrategy(SortOption sortLeft, SortOption sortRight) {
    super(sortLeft, sortRight, false);
  }
 
  public void setPreferMemCutoff(int cutoff) {
    this.preferMemCutoff = cutoff;
  }
 
    @Override
    public void close() {
      if (joinNode == null) {
        return;
      }
      super.close();
      if (this.index != null) {
        this.index.remove();
      }
      releaseReserved();
      this.index = null;
      this.tb = null;
      this.currentSource = null;
      this.sortedSource = null;
      this.notSortedSource = null;
      this.sortedTuple = null;
      this.reverseIndexes = null;
    }
   
    /**
     * Create an index of the smaller size
     * 
     * TODO: reuse existing temp table indexes
     */
    public void createIndex(SourceState state, boolean sorted) throws TeiidComponentException, TeiidProcessingException {
      int keyLength = state.getExpressionIndexes().length;
      List elements = state.getSource().getOutputElements();

      //TODO: minimize reordering, or at least detect when it's not necessary
      int[] reorderedSortIndex = Arrays.copyOf(state.getExpressionIndexes(), elements.size());
      Set<Integer> used = new HashSet<Integer>();
      for (int i : state.getExpressionIndexes()) {
      used.add(i);
      }
      int j = state.getExpressionIndexes().length;
      for (int i = 0; i < elements.size(); i++) {
        if (!used.contains(i)) {
          reorderedSortIndex[j++] = i;
        }
      }
      List<SingleElementSymbol> reordered = RelationalNode.projectTuple(reorderedSortIndex, elements);
      if (!state.isDistinct()) {
        //need to add a rowid, just in case
        reordered = new ArrayList<SingleElementSymbol>(reordered);
        ElementSymbol id = new ElementSymbol("rowId"); //$NON-NLS-1$
        id.setType(DataTypeManager.DefaultDataClasses.INTEGER);
        reordered.add(keyLength, id);
        keyLength++;
      }
      index = this.joinNode.getBufferManager().createSTree(reordered, this.joinNode.getConnectionID(), keyLength);
      index.setPreferMemory(true);
      if (!state.isDistinct()) {
        index.getComparator().setDistinctIndex(keyLength-2);
      }
      IndexedTupleSource its = state.getTupleBuffer().createIndexedTupleSource(!joinNode.isDependent());
      int rowId = 0;
      List<?> lastTuple = null;
      boolean sortedDistinct = sorted && !state.isDistinct();
      int sizeHint = index.getExpectedHeight(state.getTupleBuffer().getRowCount());
      outer: while (its.hasNext()) {
        //detect if sorted and distinct
        List<?> originalTuple = its.nextTuple();
        //remove the tuple if it has null
        for (int i : state.getExpressionIndexes()) {
          if (originalTuple.get(i) == null) {
            continue outer;
          }
        }
        if (sortedDistinct && lastTuple != null && this.compare(lastTuple, originalTuple, state.getExpressionIndexes(), state.getExpressionIndexes()) == 0) {
          sortedDistinct = false;
        }
        lastTuple = originalTuple;
        List<Object> tuple = (List<Object>) RelationalNode.projectTuple(reorderedSortIndex, originalTuple);
        if (!state.isDistinct()) {
          tuple.add(keyLength - 1, rowId++);
        }
        index.insert(tuple, sorted?InsertMode.ORDERED:InsertMode.NEW, sizeHint);
      }
      if (!sorted) {
        index.compact();
      }
      its.closeSource();
      this.reverseIndexes = new int[elements.size()];
      for (int i = 0; i < reverseIndexes.length; i++) {
        int oldIndex = reorderedSortIndex[i];
        this.reverseIndexes[oldIndex] = i + (!state.isDistinct()&&i>=keyLength-1?1:0);
      }
      if (!state.isDistinct()
          && ((!sorted && index.getComparator().isDistinct()) || (sorted && sortedDistinct))) {
        this.index.removeRowIdFromKey();
        state.markDistinct(true);
      }
    }
   
    @Override
    protected void loadLeft() throws TeiidComponentException,
        TeiidProcessingException {
      if (this.joinNode.isDependent()) {
          this.leftSource.getTupleBuffer();
      }
    }
   
    private boolean shouldIndexIfSmall(SourceState source) throws TeiidComponentException, TeiidProcessingException {
      Number cardinality = source.getSource().getEstimateNodeCardinality();
      return (source.hasBuffer() || (cardinality != null && cardinality.floatValue() != NewCalculateCostUtil.UNKNOWN_VALUE && cardinality.floatValue() <= this.joinNode.getBatchSize()))
      && (source.getRowCount() <= this.joinNode.getBatchSize());
    }
   
    @Override
    protected void loadRight() throws TeiidComponentException,
        TeiidProcessingException {
      //the checks are done in a particular order to ensure we don't buffer if possible
      if (processingSortRight == SortOption.SORT && shouldIndexIfSmall(this.leftSource)) {
        this.processingSortRight = SortOption.NOT_SORTED;
      } else if (!this.leftSource.hasBuffer() && processingSortLeft == SortOption.SORT && shouldIndexIfSmall(this.rightSource)) {
        this.processingSortLeft = SortOption.NOT_SORTED;
      } else {
        this.leftSource.getTupleBuffer();
        if (!this.rightSource.hasBuffer() && processingSortRight == SortOption.SORT && shouldIndexIfSmall(this.leftSource)) {
            this.processingSortRight = SortOption.NOT_SORTED;
          } else if (processingSortRight == SortOption.SORT && shouldIndex(this.leftSource, this.rightSource)) {
          this.processingSortRight = SortOption.NOT_SORTED;
        } else if (processingSortLeft == SortOption.SORT && shouldIndex(this.rightSource, this.leftSource)) {
          this.processingSortLeft = SortOption.NOT_SORTED;
        }
      }
      if (this.processingSortLeft != SortOption.NOT_SORTED && this.processingSortRight != SortOption.NOT_SORTED) {
        super.loadRight();
        super.loadLeft();
        return; //degrade to merge join
      }
        if (this.processingSortLeft == SortOption.NOT_SORTED) {
          this.sortedSource = this.rightSource;
          this.notSortedSource = this.leftSource;

          if (!repeatedMerge) {
            createIndex(this.rightSource, this.processingSortRight == SortOption.ALREADY_SORTED);
          } else {
            super.loadRight(); //sort if needed
            this.notSortedSource.sort(SortOption.NOT_SORTED); //do a single sort pass
          }
        } else if (this.processingSortRight == SortOption.NOT_SORTED) {
          this.sortedSource = this.leftSource;
          this.notSortedSource = this.rightSource;

          if (!repeatedMerge) {
            createIndex(this.leftSource, this.processingSortLeft == SortOption.ALREADY_SORTED);
          } else {
            super.loadLeft(); //sort if needed
            this.notSortedSource.sort(SortOption.NOT_SORTED); //do a single sort pass
          }
        }
    }
   
    private boolean shouldIndex(SourceState possibleIndex, SourceState other) throws TeiidComponentException, TeiidProcessingException {
      if (possibleIndex.getRowCount() * 4 > other.getRowCount()) {
        return false; //index is too large
      }
      int schemaSize = this.joinNode.getBufferManager().getSchemaSize(other.getSource().getOutputElements());
      int toReserve = this.joinNode.getBufferManager().getMaxProcessingKB();
      //check if the other side can be sorted in memory
      if (other.getRowCount() <= this.joinNode.getBatchSize()
          || (possibleIndex.getRowCount() > this.joinNode.getBatchSize() && other.getRowCount()/this.joinNode.getBatchSize() < toReserve/schemaSize)) {
        return false;
      }
      boolean useIndex = false;
      int indexSchemaSize = this.joinNode.getBufferManager().getSchemaSize(possibleIndex.getSource().getOutputElements());
      //approximate that 1/2 of the index will be memory resident
      toReserve = (int)(indexSchemaSize * possibleIndex.getTupleBuffer().getRowCount() / (possibleIndex.getTupleBuffer().getBatchSize() * .5));
      if (toReserve < this.joinNode.getBufferManager().getMaxProcessingKB()) {
        useIndex = true;
      } else if (possibleIndex.getTupleBuffer().getRowCount() / this.joinNode.getBatchSize() < preferMemCutoff) {
        useIndex = true;
      }
      if (useIndex) {
        reserved = this.joinNode.getBufferManager().reserveBuffers(toReserve, BufferReserveMode.FORCE);
        return true;
      }
      this.repeatedMerge = true;
      return true;
    }
   
  private void releaseReserved() {
    this.joinNode.getBufferManager().releaseBuffers(this.reserved);
    this.reserved = 0;
  }
       
    @Override
    protected void process() throws TeiidComponentException,
        TeiidProcessingException {
      if (this.processingSortLeft != SortOption.NOT_SORTED && this.processingSortRight != SortOption.NOT_SORTED) {
        super.process();
        return;
      }
      if (this.sortedSource.getTupleBuffer().getRowCount() == 0) {
        return;
      }
      if (repeatedMerge) {
        while (this.notSortedSource.hasBuffer()) {
          super.process();
          resetMatchState();
          this.sortedSource.resetState();
          this.notSortedSource.nextBuffer();
        }
        return;
      }
      //else this is a single scan against the index
      if (currentSource == null) {
        currentSource = this.notSortedSource.getIterator();
      }
      while (true) {
        if (this.currentTuple == null) {
          currentTuple = this.currentSource.nextTuple();
          if (currentTuple == null) {
            return;
          }
            List<?> key = RelationalNode.projectTuple(this.notSortedSource.getExpressionIndexes(), this.currentTuple);
            tb = new TupleBrowser(this.index, new CollectionTupleSource(Arrays.asList(key).iterator()), OrderBy.ASC);
        }
        if (sortedTuple == null) {
          sortedTuple = tb.nextTuple();
       
          if (sortedTuple == null) {
            currentTuple = null;
            continue;
          }
        }
        List<?> reorderedTuple = RelationalNode.projectTuple(reverseIndexes, sortedTuple);
      List outputTuple = outputTuple(this.processingSortLeft==SortOption.NOT_SORTED?currentTuple:reorderedTuple,
          this.processingSortLeft==SortOption.NOT_SORTED?reorderedTuple:currentTuple);
      boolean matches = this.joinNode.matchesCriteria(outputTuple);
          this.sortedTuple = null;
          if (matches) {
            this.joinNode.addBatchRow(outputTuple);
          }
      }
    }
   
    @Override
    public EnhancedSortMergeJoinStrategy clone() {
      return new EnhancedSortMergeJoinStrategy(this.sortLeft, this.sortRight);
    }
   
    @Override
    public String getName() {
      return "ENHANCED SORT JOIN"; //$NON-NLS-1$
    }
        
}
TOP

Related Classes of org.teiid.query.processor.relational.EnhancedSortMergeJoinStrategy

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.