Package gem.util

Source Code of gem.util.SetUtils

package gem.util;

import gem.Difference;
import gem.StageAnalyzer;
import gem.Triplet;
import gem.parser.TabDelimitedFileParser;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.*;

/**
* @author Ozgun Babur
*/
public class SetUtils
{
  public static void printVenn(Set s1, Set s2)
  {
    int c = 0;

    for (Object o : s1)
    {
      if (s2.contains(o))
      {
        c++;
//        System.out.println(o);
      }
    }

    System.out.println((s1.size() - c) + "\t" + c + "\t" + (s2.size() - c));
  }

  public static void printVenn(Set s1, Set s2, Set s3)
  {
    int c12 = 0;
    int c13 = 0;
    int c23 = 0;
    int c123 = 0;

    for (Object o : s1)
    {
      if (s2.contains(o))
      {
        if (s3.contains(o))
        {
          c123++;
        }
        else
        {
          c12++;
        }
      }
      else if (s3.contains(o))
      {
        c13++;
      }
    }

    for (Object o : s2)
    {
      if (s3.contains(o))
      {
        c23++;
      }
    }

    System.out.println("\t" + (s1.size() - c12 - c13 - c123));
    System.out.println(c12 + "\t" + c123 + "\t" + c13);
    System.out.println((s2.size() - c12 - c23 - c123) + "\t" + c23 + "\t" +
      (s3.size() - c13 - c23 - c123));
  }

  public static int countCommon(Collection s1, Collection s2)
  {
    int c = 0 ;
    for (Object o : s1)
    {
      if (s2.contains(o)) c++;
    }
    return c;
  }

  public static <T> Set<T> getCommon(Set<T> set1, Set<T> set2)
  {
    Set<T> com = new HashSet<T>(set1);
    com.retainAll(set2);
    return com;
  }

  public static <T> Set<T> getDiff(Collection<T> set1, Collection<T> set2)
  {
    Set<T> diff = new HashSet<T>(set1);
    diff.addAll(set1);
    diff.removeAll(set2);
    return diff;
  }

  public static void removeCommon(Set set1, Set set2)
  {
    Set common = getCommon(set1, set2);
    set1.removeAll(common);
    set2.removeAll(common);
  }

  /**
   * Returns the ratio of overlap over size of set1.
   * @param set1
   * @param set2
   * @return
   */
  public static double overlap(Set set1, Set set2)
  {
    return getCommon(set1, set2).size() / (double) set1.size();
  }
 
  public static void main(String[] args) throws Throwable
  {
    printColumnOverlap("temp");
  }

//  public static void solveQuadratic()
//  {
//    int c1 = 62;
//    int c2 = 73;
//    int total = 405;
//    int common = 22;
//
//    double b = total - c1 - c2;
//    double c = (c1 * c2) - (total * common);
//
//    double r1 = (-b + Math.sqrt((b * b) - (4 * c))) / 2;
//    double r2 = (-b - Math.sqrt((b * b) - (4 * c))) / 2;
//
//    System.out.println("r1 = " + r1);
//    System.out.println("r2 = " + r2);
//  }

  public static void solveQuadratic()
  {
    int c1 = 83;
    int c2 = 100;
    int total = 405;
    int common = 39;

    double r = ((total * common) - (c1 * c2)) / (total + common - c1 - c2);
    System.out.println("r = " + r);
  }

  public static void test2() throws IOException
  {
    String a1 = "";
    String a2 = "";
    Set<String> s1 = new HashSet<String>(Arrays.asList(a1.split("\t")));
    Set<String> s2 = new HashSet<String>(Arrays.asList(a2.split("\t")));
    printVenn(s1, s2);
//    s1.retainAll(s2);
//    for (String s : s1)
//    {
//      System.out.print("\t" + s);
//    }
  }

  /**
   * The file contains names in columns. This method prints the overlap matrix among columns.
   * @throws IOException
   */
  public static void printColumnOverlap(String file) throws IOException
  {
    int size = FileUtil.getFirstLine(file).split("\t").length;
   
    Set<String>[] set = new Set[size];
   
    for (int i = 0; i < size; i++)
    {
      TabDelimitedFileParser p = new TabDelimitedFileParser(file);
      set[i] = p.getColumnSet(i);
    }

    for (String s : getCommon(set[0], set[1]))
    {
      System.out.println(s);
    }

    for (int i = 0; i < size; i++)
    {
      System.out.print("\t" + i + "(" + set[i].size() + ")");
    }

    DecimalFormat f = new DecimalFormat("0.00");
    for (int i = 0; i < size; i++)
    {
      System.out.print("\n" + i);
      for (int j = 0; j < size; j++)
      {
        if (i == j) System.out.print("\t    ");
        else System.out.print("\t" + f.format(overlap(set[i], set[j])));
      }
    }
    Set<String> all = new HashSet<String>();
    for (Set<String> s : set)
    {
      all.addAll(s);
    }
   
    Histogram h = new Histogram(1);

    for (String token : all)
    {
      h.count(count(token, set));
    }
    System.out.println("\n\nOcc\tCnt");
    h.print();
  }
 
  private static int count(String token, Set<String>[] set)
  {
    int i = 0;
    for (Set<String> s : set)
    {
      if (s.contains(token)) i++;
    }
    return i;
  }
}
TOP

Related Classes of gem.util.SetUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.