Package uk.ac.ucl.panda.utility.structure

Examples of uk.ac.ucl.panda.utility.structure.TermFreqVector


    Iterator i = fieldToReader.entrySet().iterator();
    while (i.hasNext()) {
      Map.Entry e = (Map.Entry)i.next();
      String field = (String)e.getKey();
      IndexReader reader = (IndexReader)e.getValue();
      TermFreqVector vector = reader.getTermFreqVector(n, field);
      if (vector != null)
        results.add(vector);
    }
    return (TermFreqVector[])
      results.toArray(new TermFreqVector[results.size()]);
View Full Code Here


      df = reader.docFreq(term) * 1.0d;
      DocNum = reader.numDocs();
      idf = Math.log(DocNum / df) * log2toe;
      // calculating P(t|M) M - document collection - LM
      // tfc += f;
      TermFreqVector freqVector = reader.getTermFreqVector(doc, "body");
      populateLengthNormalization(freqVector);
      populateDocumentSpecificityScore(freqVector, DL, DocNum);

      double score = 0;
      // score = model.getscore(tf, df, idf, DL, aveDL, DocNum, CL,
View Full Code Here

  }

  // Pearson's product-moment coefficient
  private double correlation(int a, int b) throws IOException {
    double score = 0;
    TermFreqVector doc_a = reader.getTermFreqVector(a, docDataField);
    // /////////////
    // if( doc_a==null) System.out.println("doc_a is null");
    // /////////////
    TermFreqVector doc_b = reader.getTermFreqVector(b, docDataField);

    // stroe all the term freq in hashmap
    HashMap term_map = new HashMap();
    HashMap map_a = new HashMap();
    String terms[] = doc_a.getTerms();
    int freq[] = doc_a.getTermFrequencies();
    double ave_a = 0;
    for (int i = 0; i < terms.length; i++) {
      map_a.put(terms[i], freq[i]);
      term_map.put(terms[i], terms[i]);
      ave_a += freq[i];
    }
    ave_a = ave_a / terms.length;

    double ave_b = 0;
    HashMap map_b = new HashMap();
    terms = doc_b.getTerms();
    freq = doc_b.getTermFrequencies();
    for (int i = 0; i < terms.length; i++) {
      map_b.put(terms[i], freq[i]);
      term_map.put(terms[i], terms[i]);
      ave_b += freq[i];
    }
View Full Code Here

TOP

Related Classes of uk.ac.ucl.panda.utility.structure.TermFreqVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.