Package bgu.bio.adt.rna

Source Code of bgu.bio.adt.rna.RNASpecificTree$ShuffleData

package bgu.bio.adt.rna;

import gnu.trove.list.array.TCharArrayList;
import gnu.trove.list.array.TIntArrayList;

import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.Stack;

import bgu.bio.adt.graphs.Tree;
import bgu.bio.adt.queue.Pool;
import bgu.bio.ds.rna.RNA;
import bgu.bio.util.Filter;

/**
* The Class RNASpecificTree is a tree representation of RNA secondary
* structure. in this representation each base-pair is a node and each interval
* bases (loops) are also nodes. the supported loops are hairpin, multi internal
* and external. This tree class is not Thread safe. multiple threads can read
* it but some methods (e.g. {@link #shuffle()}) may change the internal state
* of the tree.
*/
public class RNASpecificTree extends Tree {

  /** the identity number of the tree. */
  private int id;

  /**
   * The number of stems in the tree. a stem is a stacking of base-pairs
   * without any internal loops or bulges.
   */
  protected int numOfStems;

  /** The name of the tree. */
  protected String name;

  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList openNodes;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList mlNodes;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList bpNodes;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList inNodes;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList oneEndElements;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TCharArrayList intervalChars;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private ArrayList<ShuffleData> complexEndElements;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private Pool<ShuffleData> pool;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private int[] pos;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList stemsStarts;
  /** Used to save time and memory in the {@link #shuffle()} methods */
  private TIntArrayList stemsEnds;

  /**
   * this array hold the sizes of stems in the original tree. The field is
   * used in the {@link #shuffle()} methods.
   */
  private int[] stemSizes;
  /**
   * this array hold the sizes of each interval in the original tree. The
   * field is used in the {@link #shuffle()} methods.
   */
  int[] intervalSizes;

  /**
   * Instantiates a new RNA specific tree.
   */
  public RNASpecificTree() {
    super();
  }

  /**
   * Instantiates a new rNA specific tree.
   *
   * @param name
   *            the name of the tree
   */
  public RNASpecificTree(String name) {
    this();
    this.name = name;
  }

  public boolean buildFromViennaFormat(char[] sequence, char[] structure,
      boolean validate, boolean removeDangling) {
    if (!validate)
      buildFromViennaFormat(sequence, structure, removeDangling);

    if (this.validate(sequence, structure)) {
      try {
        buildFromViennaFormat(sequence, structure, removeDangling);
        return true;
      } catch (RuntimeException e) {
        throw new RuntimeException("Error in parsing tree", e);
      }
    }
    return false;
  }

  private boolean validate(char[] sequence, char[] structure) {
    if (sequence == null || structure == null
        || sequence.length != structure.length)
      return false;

    TIntArrayList stack = new TIntArrayList();
    for (int i = 0; i < structure.length; i++) {
      if (structure[i] == '.') {
        // do nothing
      } else if (structure[i] == '(') {
        stack.add(1);
      } else if (structure[i] == ')') {
        if (stack.size() == 0)
          return false;
        remove(stack);
      } else {
        // saw something else
        return false;
      }
    }
    return true;
  }

  /**
   * Builds the RNA tree from Vienna bracket format. assumes that the input is
   * legal and in the format of Structure using only the '(', ')' and '.'
   * symbols
   *
   * @param sequence
   *            the sequence
   * @param structure
   *            the structure
   */
  public void buildFromViennaFormat(char[] sequence, char[] structure) {
    buildFromViennaFormat(sequence, structure, false);
  }

  /**
   * Builds the RNA tree from Vienna bracket format. assumes that the input is
   * legal and in the format of Structure using only the '(', ')' and '.'
   * symbols
   *
   * @param sequence
   *            the sequence
   * @param structure
   *            the structure
   * @param removeDangling
   *            should remove dangling from the tree
   */
  public void buildFromViennaFormat(char[] sequence, char[] structure,
      boolean removeDangling) {
    numOfStems = 0;
    TIntArrayList leftBrackets = new TIntArrayList();

    int[] pairs = new int[sequence.length];
    int[] branches = new int[sequence.length];

    // choose start and end positions for the tree
    final char OPEN_BRACKET = '(';
    final char CLOSE_BRACKET = ')';
   
    final int seqStart = removeDangling ? indexOf(structure, OPEN_BRACKET) : 0;
   
    final int seqEnd = removeDangling ? lastIndexOf(structure, CLOSE_BRACKET) + 1
        : sequence.length;

    //part 1
    for (int seqIx = seqStart; seqIx < seqEnd; ++seqIx) {
      if (structure[seqIx] == CLOSE_BRACKET) {
        int left = leftBrackets.removeAt(leftBrackets.size() - 1);
        pairs[left] = seqIx;
        pairs[seqIx] = left;
        if (!leftBrackets.isEmpty()) {
          ++branches[leftBrackets.get(leftBrackets.size() - 1)];
        }
      } else if (structure[seqIx] == OPEN_BRACKET) {
        leftBrackets.add(seqIx);
      } else {
        pairs[seqIx] = -1;
        branches[seqIx] = -1;
      }
    }
    // part 2

    int seqIx = seqStart;
    int nodeIx = 2;

    TIntArrayList stack = new TIntArrayList();
    ArrayList<TIntArrayList> nodes = new ArrayList<TIntArrayList>();
    ArrayList<RNANodeLabel> tmpLabelsList = new ArrayList<RNANodeLabel>();
    nodes.add(new TIntArrayList());
    tmpLabelsList.add(new RNANodeLabel(RNANodeLabel.ROOT,
        new TCharArrayList("ROOT".toCharArray()), -1, -1, -1));

    nodes.add(new TIntArrayList());
    stack.add(1);
    tmpLabelsList.add(new RNANodeLabel(RNANodeLabel.EXTERNAL,
        new TCharArrayList("EXT".toCharArray()), -1, -1, -1));
    // connect root to external
    connectNodes(nodes, 0, 1);
    TIntArrayList tmpStemSizes = new TIntArrayList();
    TIntArrayList tmpIntervalSizes = new TIntArrayList();
    tmpStemSizes.add(0);

    int group = 0;
    while (seqIx < seqEnd) {
      if (structure[seqIx] == CLOSE_BRACKET) {
        while (tmpLabelsList.get(peek(stack)).getType() != RNANodeLabel.BASE_PAIR) {
          remove(stack);
        }
        remove(stack);
        seqIx++;
      } else if (structure[seqIx] == OPEN_BRACKET) {
        nodes.add(new TIntArrayList());
        // check if parent is ML or EXT then change the group id
        if (tmpLabelsList.get(peek(stack)).getType() != RNANodeLabel.BASE_PAIR
            && tmpLabelsList.get(peek(stack)).getType() != RNANodeLabel.IL_LOOP) {
          group++;
        }
        tmpLabelsList.add(new RNANodeLabel(RNANodeLabel.BASE_PAIR,
            new TCharArrayList(new char[] { sequence[seqIx],
                sequence[pairs[seqIx]] }), seqIx + 1,
            pairs[seqIx] + 1, group));
        tmpStemSizes.set(tmpStemSizes.size() - 1,
            tmpStemSizes.get(tmpStemSizes.size() - 1) + 1);
        // connect to head
        connectNodes(nodes, nodeIx, peek(stack));
        stack.add(nodeIx);
        nodeIx++;

        if (pairs[seqIx + 1] != pairs[seqIx] - 1) {
          // not part of stack
          nodes.add(new TIntArrayList());
          int type = RNANodeLabel.HP_LOOP;
          String typeName = "HP";
          if (branches[seqIx] == 1) {
            type = RNANodeLabel.IL_LOOP;
            typeName = "IL";
          } else if (branches[seqIx] > 1) {
            type = RNANodeLabel.ML_LOOP;
            typeName = "ML";
            group++;
          }
          tmpStemSizes.add(0);
          numOfStems++;

          tmpLabelsList.add(new RNANodeLabel(type,
              new TCharArrayList(typeName.toCharArray()), -1, -1,
              group));
          // connect to head
          connectNodes(nodes, nodeIx, peek(stack));
          stack.add(nodeIx);
          nodeIx++;
        }
        seqIx++;
      } else {
        // base interval
        int start = seqIx;
        while (seqIx < sequence.length
            && (structure[seqIx] != OPEN_BRACKET && structure[seqIx] != CLOSE_BRACKET)) {
          seqIx++;
        }

        // create an interval node
        nodes.add(new TIntArrayList());
        char[] label = new char[seqIx - start];
        System.arraycopy(sequence, start, label, 0, label.length);
        tmpLabelsList.add(new RNANodeLabel(RNANodeLabel.BASE_INTERVAL,
            new TCharArrayList(label), start + 1, seqIx, group));
        tmpIntervalSizes.add(label.length);
        // connect to head
        connectNodes(nodes, nodeIx, peek(stack));
        nodeIx++;
      }
    }

    // copy labels
    NodeLabel[] labelsArray = new RNANodeLabel[tmpLabelsList.size()];
    for (int i = 0; i < tmpLabelsList.size(); i++) {
      labelsArray[i] = tmpLabelsList.get(i);
    }

    // copy edges
    int[][] edges = new int[nodes.size()][];
    for (int i = 0; i < nodes.size(); i++) {
      TIntArrayList list = nodes.get(i);
      edges[i] = list.toArray();
    }

    init(edges, labelsArray);
    this.stemSizes = new int[numOfStems];
    for (int i = 0; i < this.stemSizes.length; i++) {
      stemSizes[i] = tmpStemSizes.get(i);
    }
    this.intervalSizes = tmpIntervalSizes.toArray();
  }

  private int lastIndexOf(char[] structure, char c) {
    for (int i = structure.length - 1; i >= 0; i--) {
      if (structure[i] == c)
        return i;
    }
    return -1;
  }

  private int indexOf(char[] structure, char c) {
    for (int i = 0; i < structure.length; i++) {
      if (structure[i] == c)
        return i;
    }
    return -1;
  }

  /**
   * Peek. Internal method used to use {@link TIntArrayList} as {@link Stack}
   * for primitive types
   *
   * @param stack
   *            the stack
   * @return the number on top of the stack
   */
  private int peek(TIntArrayList stack) {
    return stack.get(stack.size() - 1);
  }

  /**
   * Remove. Internal method used to use {@link TIntArrayList} as
   * {@link Stack} for primitve types
   *
   * @param stack
   *            the stack
   * @return the number on top of the list
   */
  private int remove(TIntArrayList stack) {
    return stack.removeAt(stack.size() - 1);
  }

  /**
   * Connect nodes. this method adds nodes to each other lists
   *
   * @param nodes
   *            the nodes
   * @param id1
   *            the id of the first node
   * @param id2
   *            the id of the second node
   */
  private void connectNodes(ArrayList<TIntArrayList> nodes, int id1, int id2) {
    nodes.get(id1).add(id2);
    nodes.get(id2).add(id1);
  }

  /**
   * Calculates the groups of the nodes according to the tree. two nodes
   * belong to the same group if and only if they belong in the same stem.
   */
  public void groupByStructure() {
    int rootID = -1;
    for (int i = 0; i < nodeNum; i++) {
      ((RNANodeLabel) labels[i]).setGroup(-1);
      if (labels[i].getType() == RNANodeLabel.ROOT) {
        rootID = i;
      }
    }
    groupByStructure(rootID, 0);
  }

  private int groupByStructure(int nodeId, int group) {
    RNANodeLabel currentLabel = (RNANodeLabel) labels[nodeId];
    (currentLabel).setGroup(group);
    int deg = outDeg(nodeId);
    for (int i = 0; i < deg; i++) {
      RNANodeLabel l = (RNANodeLabel) labels[getNeighbor(nodeId, i)];
      if (l.getGroup() == -1) {
        // if not set
        if (!((currentLabel.getType() == RNANodeLabel.BASE_PAIR && l
            .getType() == RNANodeLabel.BASE_PAIR)
            || (currentLabel.getType() == RNANodeLabel.HP_LOOP && l
                .getType() == RNANodeLabel.BASE_INTERVAL) || (currentLabel
            .getType() == RNANodeLabel.BASE_PAIR && l.getType() == RNANodeLabel.HP_LOOP))) {
          group++;
        }
        group = groupByStructure(getNeighbor(nodeId, i), group);
      }
    }
    return group;
  }

  public void shuffle() {
    shuffle(new Random());
  }

  public void shuffle(Random rand) {
    createLists();

    int rootId = 0;
    int externalId = 1;

    // copy data to lists
    for (int i = 0; i < this.nodeNum; i++) {
      if (labels[i].getType() == RNANodeLabel.BASE_PAIR) {
        bpNodes.add(i);
      } else if (labels[i].getType() == RNANodeLabel.BASE_INTERVAL) {
        inNodes.add(i);
        // copy all the chars to the pool of chars
        TCharArrayList list = labels[i].getLabelValue();
        for (int c = 0; c < list.size(); c++) {
          intervalChars.add(list.get(c));
        }
      } else if (labels[i].getType() == RNANodeLabel.EXTERNAL) {
        externalId = i;
      } else if (labels[i].getType() == RNANodeLabel.ROOT) {
        rootId = i;
      } else if (labels[i].getType() == RNANodeLabel.ML_LOOP) {
        mlNodes.add(i);
      } else {
        openNodes.add(i);
      }
    }

    intervalChars.shuffle(rand);
    shuffleList(inNodes, rand);
    shuffleList(bpNodes, rand);
    shuffleList(openNodes, rand);
    shuffleList(mlNodes, rand);

    // shuffle the EXT node
    if (mlNodes.size() != 0) {
      // if we have a node to replace the EXT with add the EXT to the list
      // and shuffle it again
      final int index = rand.nextInt(mlNodes.size() + 1);
      if (index != mlNodes.size()) {
        // if the selected node is different then the old one, replace
        // their data and save the new one as the EXT
        final int selected = mlNodes.removeAt(index);
        // find location
        int bpLocation = -1;
        for (int i = 0; i < outDeg(selected) && bpLocation == -1; i++) {
          if (labels[outAdjLists[selected][i]].getType() == RNANodeLabel.BASE_PAIR) {
            bpLocation = i;
          }
        }
        // switch the neighbors information
        int tmp = outAdjLists[selected][bpLocation];
        outAdjLists[selected][bpLocation] = rootId;
        outAdjLists[externalId][getNeighborIx(externalId, rootId)] = tmp;
        // switch type
        labels[selected].setType(RNANodeLabel.EXTERNAL);
        labels[externalId].setType(RNANodeLabel.ML_LOOP);
        // switch type data
        TCharArrayList tmpList = labels[externalId].getLabelValue();
        labels[externalId].setLabelValue(labels[selected]
            .getLabelValue());
        labels[selected].setLabelValue(tmpList);
        // add the old one back to the list and mark the selected as the
        // new EXT node
        mlNodes.add(externalId);
        externalId = selected;
      }
    }

    // move the rest mlNodes to the openNodes
    while (mlNodes.size() != 0) {
      openNodes.add(mlNodes.removeAt(mlNodes.size() - 1));
    }

    int intervalSum = 0;
    // divide the interval data
    for (int i = 0; i < inNodes.size(); i++) {
      TCharArrayList list = labels[inNodes.get(i)].getLabelValue();
      intervalSum += list.size();
      list.resetQuick();
      // add at least one char for each interval
      list.add(intervalChars.removeAt(intervalChars.size() - 1));
    }

    // divide the rest of the data
    while (intervalChars.size() > 0) {
      final int val = rand.nextInt(intervalSum) + 1;
      int currentSum = intervalSizes[0];
      int chosenInterval = 0;
      while (currentSum < val) {
        chosenInterval++;
        currentSum += intervalSizes[chosenInterval];
      }
      final char c = intervalChars.removeAt(intervalChars.size() - 1);
      labels[inNodes.get(chosenInterval)].getLabelValue().add(c);
    }

    if (pos == null || pos.length != nodeNum) {
      pos = new int[nodeNum];
    } else {
      Arrays.fill(pos, 0);
    }

    if (stemsStarts == null) {
      stemsStarts = new TIntArrayList();
      stemsEnds = new TIntArrayList();
    } else {
      stemsStarts.resetQuick();
      stemsEnds.resetQuick();
    }

    // give each stem the minimum amount
    final int stemSum = bpNodes.size();
    for (int i = 0; i < numOfStems; i++) {
      final int node = remove(bpNodes);
      stemsStarts.add(node);
      stemsEnds.add(node);
    }

    if (complexEndElements == null) {
      complexEndElements = new ArrayList<ShuffleData>();
      pool = new Pool<ShuffleData>();
    } else {
      complexEndElements.clear();
    }

    // divide the rest of the data
    while (bpNodes.size() > 0) {
      final int val = rand.nextInt(stemSum) + 1;
      int currentSum = stemSizes[0];
      int chosen = 0;
      while (currentSum < val) {
        chosen++;
        currentSum += stemSizes[chosen];
      }
      final int current = bpNodes.removeAt(bpNodes.size() - 1);
      int last = stemsEnds.get(chosen);
      connectNodes(pos, last, current);
      stemsEnds.set(chosen, current);
    }

    // divide the rest of the base pairs
    for (int i = 0; i < numOfStems; i++) {
      final int start = stemsStarts.get(i);
      final int last = stemsEnds.get(i);

      // attach the stem to a open end node
      final int element = remove(openNodes);
      // connect one and count the rest
      final int deg = outDeg(element);
      boolean foundOne = false;

      // create shuffle data information
      TIntArrayList temp = null;
      ShuffleData data = pool.dequeue();
      if (data == null) {
        temp = new TIntArrayList();
        data = new ShuffleData(start, element, temp);
      } else {
        data.reset(start, element);
        temp = data.positions;
      }

      for (int d = 0; d < deg; d++) {
        // check the type of the node
        switch (labels[getNeighbor(element, pos[element])].getType()) {

        case RNANodeLabel.BASE_INTERVAL:
          connectNodes(pos, element,
              inNodes.removeAt(inNodes.size() - 1));
          break;

        case RNANodeLabel.BASE_PAIR:
          if (!foundOne) {
            connectNodes(pos, element, last);
            foundOne = true;
          } else {
            outAdjLists[element][pos[element]] = -1;
            temp.add(pos[element]);
            pos[element]++;
          }
          break;
        }
      }
      // if the list is not empty then put in complex
      if (temp.size() != 0) {
        complexEndElements.add(data);
      } else {
        oneEndElements.add(start);
        // return un-needed data to pool
        pool.enqueue(data);
      }

    }

    while (complexEndElements.size() != 0) {
      ShuffleData current = complexEndElements.remove(complexEndElements
          .size() - 1);
      final int nodeId = current.nodeId;
      for (int i = 0; i < current.positions.size(); i++) {
        final int toNode = oneEndElements.removeAt(rand
            .nextInt(oneEndElements.size()));
        outAdjLists[nodeId][current.positions.get(i)] = toNode;
        outAdjLists[toNode][pos[toNode]] = nodeId;
        pos[toNode]++;
      }

      oneEndElements.add(current.openEndId);
      // release to pool
      pool.enqueue(current);
    }

    // run on external
    final int deg = outDeg(externalId);
    for (int d = 0; d < deg; d++) {
      // check the type of the node
      switch (labels[getNeighbor(externalId, pos[externalId])].getType()) {

      case RNANodeLabel.BASE_INTERVAL:
        connectNodes(pos, externalId,
            inNodes.removeAt(inNodes.size() - 1));
        break;

      case RNANodeLabel.BASE_PAIR:
        connectNodes(pos, externalId,
            oneEndElements.removeAt(oneEndElements.size() - 1));
        break;
      case RNANodeLabel.ROOT:
        connectNodes(pos, externalId, rootId);
        break;
      }
    }

    // correct the order on the runs
    reBuildInAdjList();
    indexEdges();
  }

  /**
   * Export the tree to vienna format. Useful after {@link #shuffle()} to get
   * the new sequence of the shuffled tree
   *
   * @return the char[][]
   */
  public char[][] toViennaFormat() {
    StringBuilder structure = new StringBuilder();
    StringBuilder sequence = new StringBuilder();

    toViennaFormatRec(-1, 0, sequence, structure);

    return new char[][] { sequence.toString().toCharArray(),
        structure.toString().toCharArray() };
  }

  private void toViennaFormatRec(int from, int node, StringBuilder sequence,
      StringBuilder structure) {
    final RNANodeLabel label = (RNANodeLabel) labels[node];
    if (label.getType() == RNANodeLabel.BASE_PAIR) {
      sequence.append(label.getLabelValue().getQuick(0));
      structure.append('(');
    } else if (label.getType() == RNANodeLabel.BASE_INTERVAL) {
      final int size = label.getLabelValue().size();
      for (int i = 0; i < size; i++) {
        sequence.append(label.getLabelValue().getQuick(i));
        structure.append('.');
      }
    }

    // pass on all neighbors
    for (int i = 0; i < outDeg(node); i++) {
      if (outAdjLists[node][i] != from) {
        toViennaFormatRec(node, outAdjLists[node][i], sequence,
            structure);
      }
    }

    if (label.getType() == RNANodeLabel.BASE_PAIR) {
      sequence.append(label.getLabelValue().getQuick(1));
      structure.append(')');
    }
  }

  public void toFASTAFile(String fileName) throws IOException {
    Writer writer = new FileWriter(fileName);
    toFASTAFile(writer);
    writer.close();
  }

  public void toFASTAFile(Writer writer) throws IOException {
    char[][] ans = this.toViennaFormat();
    writer.write('>');
    writer.write(this.name);
    writer.write('\n');
    writer.write(new String(ans[0]));
    writer.write('\n');
    writer.write(new String(ans[1]));
    writer.write('\n');
  }

  private void createLists() {
    if (openNodes == null) {
      openNodes = new TIntArrayList();
    } else {
      openNodes.resetQuick();
    }
    if (mlNodes == null) {
      mlNodes = new TIntArrayList();
    } else {
      mlNodes.resetQuick();
    }
    if (bpNodes == null) {
      bpNodes = new TIntArrayList();
    } else {
      bpNodes.resetQuick();
    }
    if (inNodes == null) {
      inNodes = new TIntArrayList();
    } else {
      inNodes.resetQuick();
    }
    if (oneEndElements == null) {
      oneEndElements = new TIntArrayList();
    } else {
      oneEndElements.resetQuick();
    }
    if (intervalChars == null) {
      intervalChars = new TCharArrayList();
    } else {
      intervalChars.resetQuick();
    }
  }

  private void connectNodes(int[] positions, final int fromNode,
      final int toNode) {
    outAdjLists[fromNode][positions[fromNode]] = toNode;
    positions[fromNode]++;
    outAdjLists[toNode][positions[toNode]] = fromNode;
    positions[toNode]++;
  }

  private void shuffleList(TIntArrayList list, Random rand) {
    int top = list.size();
    while (top > 0) {
      final int positions = rand.nextInt(top);
      int temp = list.get(top - 1);
      list.set(top - 1, list.get(positions));
      list.set(positions, temp);
      top--;
    }
  }

  public final String getName() {
    return name;
  }

  public final void setName(String name) {
    this.name = name;
  }

  public static ArrayList<RNASpecificTree> loadFromFile(String fileName,
      Filter<RNASpecificTree> filter) {
    return loadFromFile(fileName, filter, false);
  }

  public static ArrayList<RNASpecificTree> loadFromFile(String fileName,
      Filter<RNASpecificTree> filter, boolean printStats) {
    ArrayList<RNASpecificTree> trees = new ArrayList<RNASpecificTree>();
    ArrayList<RNA> list = RNA.loadFromFile(fileName, true);
    final int amountOfTrees = list.size();
    double sumNodes = 0, sumLength = 0;
    int minNodes = Integer.MAX_VALUE, maxNodes = Integer.MIN_VALUE, minLength = Integer.MAX_VALUE, maxLength = Integer.MIN_VALUE;
    for (int i = 0; i < amountOfTrees; i++) {
      RNA rna = list.get(i);
      rna.fixEmptyHairpins();
      RNASpecificTree tree = new RNASpecificTree(rna.getHeader());
      tree.buildFromViennaFormat(rna.getPrimary().toCharArray(), rna
          .getSecondary().toCharArray(), false, false);
      if (filter != null && filter.shouldPass(tree)) {
        trees.add(tree);
        sumNodes += tree.getNodeNum();
        sumLength += rna.getPrimary().length();
        minLength = Math.min(minLength, rna.getPrimary().length());
        minNodes = Math.min(minNodes, tree.getNodeNum());
        maxLength = Math.max(maxLength, rna.getPrimary().length());
        maxNodes = Math.max(maxNodes, tree.getNodeNum());
      }

    }
    if (printStats) {
      System.out.println("Amount of trees before filter: " + list.size());
      System.out.println("Amount of trees after filter: " + trees.size());

      System.out.println("Average node size: "
          + (sumNodes / trees.size()) + " [" + minNodes + " - "
          + maxNodes + "]");
      System.out.println("Average sequence size: "
          + (sumLength / trees.size()) + " [" + minLength + " - "
          + maxLength + "]");
    }
    return trees;
  }

  public static ArrayList<RNASpecificTree> buildRandomDBFromFile(
      String fileName, Filter<RNASpecificTree> filter, int repeat,
      long seed) {
    Random rand = new Random(seed);
    ArrayList<RNASpecificTree> trees = new ArrayList<RNASpecificTree>();
    ArrayList<RNA> list = RNA.loadFromFile(fileName, true);
    final int amountOfTrees = list.size();
    for (int i = 0; i < amountOfTrees; i++) {
      RNA rna = list.get(i);
      rna.fixEmptyHairpins();
      final char[] seq = rna.getPrimary().toCharArray();
      final char[] str = rna.getSecondary().toCharArray();
      RNASpecificTree tree = new RNASpecificTree(rna.getHeader());
      tree.buildFromViennaFormat(seq, str, false, true);
      if (filter != null && filter.shouldPass(tree)) {
        for (int r = 0; r < repeat; r++) {
          Random randTree = new Random(rand.nextLong());
          tree = new RNASpecificTree(rna.getHeader() + "_rand" + r);
          tree.buildFromViennaFormat(seq, str, false, false);
          tree.shuffle(randTree);
          trees.add(tree);
        }
      }

    }
    return trees;
  }

  private class ShuffleData {
    int openEndId;
    int nodeId;
    TIntArrayList positions;

    public ShuffleData(int openEndId, int nodeId, TIntArrayList positions) {
      super();
      this.openEndId = openEndId;
      this.nodeId = nodeId;
      this.positions = positions;
    }

    public void reset(int openEndId, int nodeId) {
      this.openEndId = openEndId;
      this.nodeId = nodeId;
      this.positions.resetQuick();
    }
  }

  @Override
  public String toString() {
    return this.name != null ? this.getName() : super.toString();
  }

  public int getId() {
    return this.id;
  }

  public void setId(int id) {
    this.id = id;
  }
}
TOP

Related Classes of bgu.bio.adt.rna.RNASpecificTree$ShuffleData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.