Package uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity

Examples of uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity.SimilarityMatrix


    List<String> dfl = StringTools.getSortedList(docFreqs);
    int maxSize = 20000;
    if(dfl.size() > maxSize) dfl = dfl.subList(0, maxSize);
    System.out.println("Have " + dfl.size() + " terms");
   
    SimilarityMatrix sm = new SimilarityMatrix(dfl);
    for(int i=0;i<dfl.size();i++) {
      for(int j=i+1;j<dfl.size();j++) {
        sm.setSimilarity(i, j, (float)wordSimilarity(dfl.get(i), dfl.get(j)));
      }
    }
    System.out.println("Similarity matrix made");
    QTClusterer qt = new QTClusterer(sm);
    qt.makeClusters(0.5);
View Full Code Here


      vectors.add(vector);
    }
    System.out.println("Read vectors in: " + (System.currentTimeMillis() - time));
    time = System.currentTimeMillis();

    SimilarityMatrix sm = new SimilarityMatrix(docFiles);
   
    int goodCosines = 0;
    int totalCosines = 0;
    for(int i=0;i<vectors.size();i++) {
      for(int j=i+1;j<vectors.size();j++) {
        double cosine = cosine(vectors.get(i), vectors.get(j));
        sm.setSimilarity(i, j, (float)cosine);
        if(cosine > 0.05) goodCosines++;
        totalCosines++;
      }
    }
    System.out.println("Fraction of pairwise similarities: " + goodCosines * 1.0 / totalCosines);
View Full Code Here

   
    time = System.currentTimeMillis();
    if(true) {   
      Set<Integer> assigned = new HashSet<Integer>();
      Matrix docMatrix = svdh.getVt().transpose();
      SimilarityMatrix simMatrix = new SimilarityMatrix(docFiles);
      for(int i=0;i<docFiles.size();i++) {
        for(int j=i+1;j<docFiles.size();j++) {
          simMatrix.setSimilarity(i, j, (float)cosine(docMatrix, i, j, svals));
          //simMatrix.setSimilarity(i, j, (float)tanimoto(docMatrix, i, j, svals));
        }
      }
      //System.out.println(Math.min(svals.length, docMatrix.getRowDimension()));
      //VirtualSimilarityMatrix simMatrix = new VirtualSimilarityMatrix(docMatrix, docFiles, Math.min(svals.length, docMatrix.getRowDimension()-1));
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.ptc.experimental.termsimilarity.SimilarityMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.