Package ch.akuhn.hapax.corpus

Examples of ch.akuhn.hapax.corpus.Terms


    return String.format("logL(%s) = %.3f", term, value());
  }

  public static Comparison compare(Terms t1, Terms t2) {
    Comparison comparison = new Comparison();
    for (String each: new Terms(t1, t2).elementSet()) {
      comparison.add(new LogLikelihood(t1, t2, each));
    }
    Collections.sort(comparison);
    return comparison;
  }
View Full Code Here


  public static class Examples {

    @Test
    public void shouldComputeLogLikelihood() {
      Terms all = new Terms("A A A A A B C C C D D");
      Terms doc = new Terms("A A B B X");
      for (String each: doc.elements()) {
        LogLikelihood loglr = new LogLikelihood(all, doc, each);
        System.out.println(loglr);
      }
    }
View Full Code Here

    //        return tdm;  
    //    }

    @Override
    public Terms terms() {
        Terms bag = new Terms();
        for (Pair<String,Vector> each: termRowPairs()) {
            bag.add(each.fst, (int) each.snd.sum());
        }
        return bag;
    }
View Full Code Here

    @Override
    public Terms getDocument(String doc) {
        int column = documents.get(doc);
        if (column == -1) throw new NoSuchElementException();
        Terms bag = new Terms();
        for (Pair<String,Vector> each: termRowPairs()) {
            int count = (int) each.snd.get(column);
            bag.add(each.fst, count);
        }
        return bag;
    }
View Full Code Here

        this.assertInvariant();
    }

    public double[] createPseudoDocument(String string) {
        // apply: CamelCaseScanner, PorterStemmer, toLowerCase, and weighting
        Terms query = new Terms(string).toLowerCase().stem();
        return createPseudoDocument(query);
    }
View Full Code Here

  public static CorpusBuilder newCorpus() {
    return new CorpusBuilder();
  }
 
  public Ranking<String> find(String content) {
    Terms query = new Terms();
    scanner.newInstance().client(query).onString(content).run();
    if (ignoreCase) query = query.toLowerCase();
        return latentIndex.rankDocumentsByQuery(query);
  }
View Full Code Here

    if (ignoreCase) query = query.toLowerCase();
        return latentIndex.rankDocumentsByQuery(query);
  }
 
  public synchronized void updateDocument(String doc, String contents) {
    Terms document = scanner.fromString(contents);
    if (ignoreCase) document = document.toLowerCase();
    latentIndex.updateDocument(doc, document);
  }
View Full Code Here

public class SmallDocumentsTest {

    @Test
    public void corpusWithSmallDocuments() {
        TermDocumentMatrix tdm = new TermDocumentMatrix();
        tdm.putDocument("m1", new Terms("Lorem ipsum dolor."));
        tdm.putDocument("m2", new Terms("Lorem ipsum dolor."));
        tdm.putDocument("m3", new Terms("Lorem ipsum dolor."));
        LatentSemanticIndex lsi = tdm.rejectAndWeight().createIndex();
        assertEquals(3, lsi.documentCount());
        assertEquals(3, lsi.rankDocumentsByQuery("Lorem").size());
    }
View Full Code Here

    }
   
    @Test
    public void corpusWithoutOneDocuments() {
        TermDocumentMatrix tdm = new TermDocumentMatrix();
        tdm.putDocument("m1", new Terms("Lorem ipsum dolor."));
        LatentSemanticIndex lsi = tdm.rejectAndWeight().createIndex();
        assertEquals(1, lsi.documentCount());
        assertEquals(1, lsi.rankDocumentsByQuery("Lorem").size());
    }
View Full Code Here

        Out.puts(list);
       
    }

    private static Terms union(Terms t1, Terms t2) {
        Terms terms = new Terms();
        terms.addAll(t1);
        terms.addAll(t2);
        return terms;
    }
View Full Code Here

TOP

Related Classes of ch.akuhn.hapax.corpus.Terms

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.