Package uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity

Examples of uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity.QTClusterer


      for(int j=i+1;j<dfl.size();j++) {
        sm.setSimilarity(i, j, (float)wordSimilarity(dfl.get(i), dfl.get(j)));
      }
    }
    System.out.println("Similarity matrix made");
    QTClusterer qt = new QTClusterer(sm);
    qt.makeClusters(0.5);
    for(Set<String> terms : qt.getClustersOfNames()) {
      System.out.println(terms);
    }
   
  }
View Full Code Here


    System.out.println("Fraction of pairwise similarities: " + goodCosines * 1.0 / totalCosines);
   
    System.out.println("Cosines calculated in: " + (System.currentTimeMillis() - time));
    time = System.currentTimeMillis();

    QTClusterer qt = new QTClusterer(sm);
    qt.makeClusters(0.05);
    System.out.println("Made clusters in: " + (System.currentTimeMillis() - time));
    for(Set<Integer> cluster : qt.getClustersBySize()) {
      List<File> clusterFiles = new ArrayList<File>();
      Map<Integer,Float> clusterMap = new HashMap<Integer,Float>();
       for(Integer i : cluster) {
         clusterMap.put(i, 1.0f);
        System.out.println(docFiles.get(i));
        clusterFiles.add(new File(docFiles.get(i)));
      }
      ClusterAnalyser.analyseCluster(new ArrayList<Integer>(cluster), ir, new TanimotoSimilarity(), 0.05);
      System.out.println();
    }
   
    for(Integer i : qt.getUnclustered()) {
      System.out.println(docFiles.get(i));
    }

    ir.close();
  }
View Full Code Here

      }
      //System.out.println(Math.min(svals.length, docMatrix.getRowDimension()));
      //VirtualSimilarityMatrix simMatrix = new VirtualSimilarityMatrix(docMatrix, docFiles, Math.min(svals.length, docMatrix.getRowDimension()-1));
      System.out.println("Similarity matrix: " + (System.currentTimeMillis() - time));
      time = System.currentTimeMillis();     
      QTClusterer qt = new QTClusterer(simMatrix);
      qt.makeClusters(0.05);
      System.out.println("Made clusters in: " + (System.currentTimeMillis() - time));
      for(Set<Integer> cluster : qt.getClustersBySize()) {
        /*double[] fuzzyVector = vectorToPoints(combinedVector(cluster, docMatrix, svals), docMatrix, svals);
        double fvt = 0.0;
        Map<Integer,Float> fc = new HashMap<Integer,Float>();
        for(int i=0;i<fuzzyVector.length;i++) {
          fvt += fuzzyVector[i];
          //if(cluster.contains(i)) System.out.println(docFiles.get(i) + "\t" + fuzzyVector[i]);
          //if(fuzzyVector[i] > 0.1 && !cluster.contains(i)) System.out.println(docFiles.get(i) + "\t" + fuzzyVector[i]);
          //if(fuzzyVector[i] > 0.05 && !cluster.contains(i)) {
          //  cluster.add(i);
          //  assigned.add(i);
          //}
          if(fuzzyVector[i] > 0.25) fc.put(i, (float)fuzzyVector[i]);
        }*/
        //System.out.println(fvt);
        for(Integer i : cluster) {
          System.out.println(docFiles.get(i));
        }
        Map<Integer,Float> clusterMap = new HashMap<Integer,Float>();
        for(Integer i : cluster) {
          clusterMap.put(i, 1.0f);
        }

        //ClusterAnalyser.analyseCluster(fc, ir);
        //ClusterAnalyser.analyseCluster(new ArrayList<Integer>(cluster), ir, new TanimotoSimilarity(), 0.05);
        ClusterAnalyser.excessAnalyseCluster(clusterMap, ir, 0.2, true);
        System.out.println();
      }
     
      for(Integer i : qt.getUnclustered()) {
        if(!assigned.contains(i)) System.out.println(docFiles.get(i));
      }
      return;
    }
   
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity.QTClusterer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.