package gem.util;
import gem.Difference;
import gem.StageAnalyzer;
import gem.Triplet;
import gem.parser.TabDelimitedFileParser;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.*;
/**
* @author Ozgun Babur
*/
public class SetUtils
{
public static void printVenn(Set s1, Set s2)
{
int c = 0;
for (Object o : s1)
{
if (s2.contains(o))
{
c++;
// System.out.println(o);
}
}
System.out.println((s1.size() - c) + "\t" + c + "\t" + (s2.size() - c));
}
public static void printVenn(Set s1, Set s2, Set s3)
{
int c12 = 0;
int c13 = 0;
int c23 = 0;
int c123 = 0;
for (Object o : s1)
{
if (s2.contains(o))
{
if (s3.contains(o))
{
c123++;
}
else
{
c12++;
}
}
else if (s3.contains(o))
{
c13++;
}
}
for (Object o : s2)
{
if (s3.contains(o))
{
c23++;
}
}
System.out.println("\t" + (s1.size() - c12 - c13 - c123));
System.out.println(c12 + "\t" + c123 + "\t" + c13);
System.out.println((s2.size() - c12 - c23 - c123) + "\t" + c23 + "\t" +
(s3.size() - c13 - c23 - c123));
}
public static int countCommon(Collection s1, Collection s2)
{
int c = 0 ;
for (Object o : s1)
{
if (s2.contains(o)) c++;
}
return c;
}
public static <T> Set<T> getCommon(Set<T> set1, Set<T> set2)
{
Set<T> com = new HashSet<T>(set1);
com.retainAll(set2);
return com;
}
public static <T> Set<T> getDiff(Collection<T> set1, Collection<T> set2)
{
Set<T> diff = new HashSet<T>(set1);
diff.addAll(set1);
diff.removeAll(set2);
return diff;
}
public static void removeCommon(Set set1, Set set2)
{
Set common = getCommon(set1, set2);
set1.removeAll(common);
set2.removeAll(common);
}
/**
* Returns the ratio of overlap over size of set1.
* @param set1
* @param set2
* @return
*/
public static double overlap(Set set1, Set set2)
{
return getCommon(set1, set2).size() / (double) set1.size();
}
public static void main(String[] args) throws Throwable
{
printColumnOverlap("temp");
}
// public static void solveQuadratic()
// {
// int c1 = 62;
// int c2 = 73;
// int total = 405;
// int common = 22;
//
// double b = total - c1 - c2;
// double c = (c1 * c2) - (total * common);
//
// double r1 = (-b + Math.sqrt((b * b) - (4 * c))) / 2;
// double r2 = (-b - Math.sqrt((b * b) - (4 * c))) / 2;
//
// System.out.println("r1 = " + r1);
// System.out.println("r2 = " + r2);
// }
public static void solveQuadratic()
{
int c1 = 83;
int c2 = 100;
int total = 405;
int common = 39;
double r = ((total * common) - (c1 * c2)) / (total + common - c1 - c2);
System.out.println("r = " + r);
}
public static void test2() throws IOException
{
String a1 = "";
String a2 = "";
Set<String> s1 = new HashSet<String>(Arrays.asList(a1.split("\t")));
Set<String> s2 = new HashSet<String>(Arrays.asList(a2.split("\t")));
printVenn(s1, s2);
// s1.retainAll(s2);
// for (String s : s1)
// {
// System.out.print("\t" + s);
// }
}
/**
* The file contains names in columns. This method prints the overlap matrix among columns.
* @throws IOException
*/
public static void printColumnOverlap(String file) throws IOException
{
int size = FileUtil.getFirstLine(file).split("\t").length;
Set<String>[] set = new Set[size];
for (int i = 0; i < size; i++)
{
TabDelimitedFileParser p = new TabDelimitedFileParser(file);
set[i] = p.getColumnSet(i);
}
for (String s : getCommon(set[0], set[1]))
{
System.out.println(s);
}
for (int i = 0; i < size; i++)
{
System.out.print("\t" + i + "(" + set[i].size() + ")");
}
DecimalFormat f = new DecimalFormat("0.00");
for (int i = 0; i < size; i++)
{
System.out.print("\n" + i);
for (int j = 0; j < size; j++)
{
if (i == j) System.out.print("\t ");
else System.out.print("\t" + f.format(overlap(set[i], set[j])));
}
}
Set<String> all = new HashSet<String>();
for (Set<String> s : set)
{
all.addAll(s);
}
Histogram h = new Histogram(1);
for (String token : all)
{
h.count(count(token, set));
}
System.out.println("\n\nOcc\tCnt");
h.print();
}
private static int count(String token, Set<String>[] set)
{
int i = 0;
for (Set<String> s : set)
{
if (s.contains(token)) i++;
}
return i;
}
}