Package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize

Examples of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode


    allNodes.addAll(nonLeaves);
    allNodes.addAll(leaves);

    columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
    for (int i = 0; i < allNodes.size(); ++i) {
      TokenizerNode node = allNodes.get(i);
      columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
    }

    // leaf widths are known at this point, so add them up
    int totalBytesWithoutOffsets = 0;
    for (int i = allNodes.size() - 1; i >= 0; --i) {
      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
      // leaves store all but their first token byte
      totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
    }

    // figure out how wide our offset FInts are
    int parentOffsetWidth = 0;
    while (true) {
      ++parentOffsetWidth;
      int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
      if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
        numBytes = numBytesFinder;
        break;
      }// it fits
    }
    if (familyVsQualifier) {
      blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
    } else {
      blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
    }

    int forwardIndex = 0;
    for (int i = 0; i < allNodes.size(); ++i) {
      TokenizerNode node = allNodes.get(i);
      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
      int fullNodeWidth = columnNodeWriter
          .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
      node.setOutputArrayOffset(forwardIndex);
      columnNodeWriter.setTokenBytes(node.getToken());
      if (node.isRoot()) {
        columnNodeWriter.setParentStartPosition(0);
      } else {
        columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
      }
      forwardIndex += fullNodeWidth;
    }

    tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
View Full Code Here


    if (fanOut <= 0) {
      return;
    }
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      os.write(child.getToken().get(0));// first byte of each child's token
    }
  }
View Full Code Here

   * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
   */
  protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
      UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
    }
  }
View Full Code Here

    if (fanOut <= 0) {
      return;
    }
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      os.write(child.getToken().get(0));// first byte of each child's token
    }
  }
View Full Code Here

   * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
   */
  protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
      UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
    }
  }
View Full Code Here

  @Test
  public void testSearching() {
    for (byte[] input : inputs) {
      TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
      builder.getNode(resultHolder, input, 0, input.length);
      TokenizerNode n = resultHolder.getMatchingNode();
      byte[] output = n.getNewByteArray();
      Assert.assertTrue(Bytes.equals(input, output));
    }
  }
View Full Code Here

  @Test
  public void testSearching() {
    for (byte[] input : inputs) {
      TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
      builder.getNode(resultHolder, input, 0, input.length);
      TokenizerNode n = resultHolder.getMatchingNode();
      byte[] output = n.getNewByteArray();
      Assert.assertTrue(Bytes.equals(input, output));
    }
  }
View Full Code Here

    // create leaf writer nodes
    // leaf widths are known at this point, so add them up
    int totalLeafBytes = 0;
    for (int i = leaves.size() - 1; i >= 0; --i) {
      TokenizerNode leaf = leaves.get(i);
      RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
      ++numLeafWriters;
      // leaves store all but their first token byte
      int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
      totalLeafBytes += leafNodeWidth;
      negativeIndex += leafNodeWidth;
      leaf.setNegativeIndex(negativeIndex);
    }

    int totalNonLeafBytesWithoutOffsets = 0;
    int totalChildPointers = 0;
    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
      TokenizerNode nonLeaf = nonLeaves.get(i);
      RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
      ++numNonLeafWriters;
      totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
      totalChildPointers += nonLeaf.getNumChildren();
    }

    // figure out how wide our offset FInts are
    int offsetWidth = 0;
    while (true) {
      ++offsetWidth;
      int offsetBytes = totalChildPointers * offsetWidth;
      int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
      if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
        // it fits
        numBytes = totalRowBytes;
        break;
      }
    }
    blockMeta.setNextNodeOffsetWidth(offsetWidth);

    // populate negativeIndexes
    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
      TokenizerNode nonLeaf = nonLeaves.get(i);
      int writerIndex = nonLeaves.size() - i - 1;
      RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
      int nodeWidth = nonLeafWriter.calculateWidth();
      negativeIndex += nodeWidth;
      nonLeaf.setNegativeIndex(negativeIndex);
    }

    return this;
  }
View Full Code Here

  protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
      ArrayList<TokenizerNode> ins, boolean leaves) {
    ArrayList<TokenizerNode> outs = Lists.newArrayList();
    for (int i = ins.size() - 1; i >= 0; --i) {
      TokenizerNode n = ins.get(i);
      if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
        outs.add(ins.get(i));
      }
    }
    return outs;
  }
View Full Code Here

    allNodes.addAll(nonLeaves);
    allNodes.addAll(leaves);

    columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
    for (int i = 0; i < allNodes.size(); ++i) {
      TokenizerNode node = allNodes.get(i);
      columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
    }

    // leaf widths are known at this point, so add them up
    int totalBytesWithoutOffsets = 0;
    for (int i = allNodes.size() - 1; i >= 0; --i) {
      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
      // leaves store all but their first token byte
      totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
    }

    // figure out how wide our offset FInts are
    int parentOffsetWidth = 0;
    while (true) {
      ++parentOffsetWidth;
      int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
      if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
        numBytes = numBytesFinder;
        break;
      }// it fits
    }
    if (this.nodeType == ColumnNodeType.FAMILY) {
      blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
    } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
      blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
    } else {
      blockMeta.setTagsOffsetWidth(parentOffsetWidth);
    }

    int forwardIndex = 0;
    for (int i = 0; i < allNodes.size(); ++i) {
      TokenizerNode node = allNodes.get(i);
      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
      int fullNodeWidth = columnNodeWriter
          .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
      node.setOutputArrayOffset(forwardIndex);
      columnNodeWriter.setTokenBytes(node.getToken());
      if (node.isRoot()) {
        columnNodeWriter.setParentStartPosition(0);
      } else {
        columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
      }
      forwardIndex += fullNodeWidth;
    }

    tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.