Examples of rejectHapaxes()


Examples of ch.akuhn.hapax.index.TermDocumentMatrix.rejectHapaxes()


    public TermDocumentMatrix makeTDM() {
        TermDocumentMatrix tdm = corpus;
        if (ignoreCase) tdm = tdm.toLowerCase();
        if (rejectRareTerms) tdm = tdm.rejectHapaxes();
        if (rejectStopwords) tdm = tdm.rejectStopwords();
        // TODO if (rejectCommonTerms) tdm = tdm.rejectCommonTerms();
        return tdm.weight(local, global);
    }
View Full Code Here

Examples of ch.akuhn.hapax.index.TermDocumentMatrix.rejectHapaxes()

    public TermDocumentMatrix rejectStopWords(final TermDocumentMatrix matrix) {
        TermDocumentMatrix tdm = matrix;
        tdm = tdm.toLowerCase();
        assertEquals(9, tdm.documentCount());
        assertEquals(42, tdm.termCount());
        tdm = tdm.rejectHapaxes();
        assertEquals(9, tdm.documentCount());
        assertEquals(16, tdm.termCount());
        tdm = tdm.toLowerCase().rejectStopwords();
        assertEquals(9, tdm.documentCount());
        assertEquals(12, tdm.termCount());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.