Package com.googlecode.gaal.suffix.impl

Source Code of com.googlecode.gaal.suffix.impl.EmbeddedSuffixTreeImplTest

package com.googlecode.gaal.suffix.impl;

import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;

import org.junit.Before;
import org.junit.Test;

import com.googlecode.gaal.analysis.impl.ProperIntervalSetBuilder;
import com.googlecode.gaal.data.api.Corpus;
import com.googlecode.gaal.data.api.IntSequence;
import com.googlecode.gaal.data.api.IntervalSet;
import com.googlecode.gaal.data.api.Multiset;
import com.googlecode.gaal.data.impl.CorpusTest;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree;
import com.googlecode.gaal.suffix.api.EmbeddedSuffixTree.EmbeddedInterval;
import com.googlecode.gaal.suffix.api.IntervalTree.Interval;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree;
import com.googlecode.gaal.suffix.api.LinearizedSuffixTree.BinaryInterval;

public class EmbeddedSuffixTreeImplTest {

    private final Corpus<String> corpus = CorpusTest.createMississippiCorpus();
    private final IntSequence sequence = corpus.sequence();
    private LinearizedSuffixTree lst;

    @Before
    public void setUp() throws Exception {
        lst = new LinearizedSuffixTreeImpl(corpus.sequence(), corpus.alphabetSize());
    }

    @Test
    public void testCreate() {
        printSuffixes();
        int windowSize = 9;
        IntervalSet<BinaryInterval> properIntervalSet = new ProperIntervalSetBuilder().buildIntervalSet(lst);
        for (BinaryInterval interval : properIntervalSet) {
            System.out.printf("===interval: '%s'===\n", corpus.toString(interval.label(), ""));
            printIntervalSuffixes(interval, windowSize);
            EmbeddedSuffixTree est = EmbeddedSuffixTreeImpl.create(lst, interval, windowSize, corpus);
            IntervalSet<EmbeddedInterval> embProperIntervalSet = new ProperIntervalSetBuilder().buildIntervalSet(est);
            for (EmbeddedInterval embeddedInterval : embProperIntervalSet) {
                Multiset<IntSequence> fillerSet = embeddedInterval.fillerSet();
                System.out.printf("\tembedded interval %s(%d) fillers: %s\n",
                        corpus.toString(embeddedInterval.label(), ""), embeddedInterval.size(),
                        corpus.toString(fillerSet, ""));
            }
        }
    }

    private void printSuffixes() {
        System.out.println("Suffix Table:");
        for (int i : lst.getSuffixTable()) {
            IntSequence suffix = sequence.subSequence(i, sequence.size());
            System.out.printf("%2d| %s\n", i, corpus.toString(suffix, ""));
        }
    }

    private void printIntervalSuffixes(Interval interval, int windowSize) {
        IntSequence indices = interval.indices();
        int lcp = interval.lcp();
        int[] suffixTable = lst.getSuffixTable();
        int[] inverseSuffixTable = lst.getInverseSuffixTable();
        SortedMap<Integer, Integer> embeddedSuffixTableIndices = new TreeMap<Integer, Integer>();
        for (int i = 0; i < interval.size(); i++) {
            int start = indices.get(i) + lcp;
            for (int j = start; j < start + windowSize && j < sequence.size(); j++) {
                IntSequence suffix = sequence.subSequence(j, sequence.size());
                Integer startIndex = embeddedSuffixTableIndices.get(inverseSuffixTable[j]);
                if (startIndex == null || startIndex < start) {
                    embeddedSuffixTableIndices.put(inverseSuffixTable[j], start);
                    System.out.printf("%2d:%2d| %s+\n", start, j, corpus.toString(suffix, ""));
                } else {
                    System.out.printf("%2d:%2d| %s-\n", start, j, corpus.toString(suffix, ""));
                }
            }
        }
        System.out.println("Chosen Suffixes:");
        for (Entry<Integer, Integer> entry : embeddedSuffixTableIndices.entrySet()) {
            IntSequence suffix = sequence.subSequence(suffixTable[entry.getKey()], sequence.size());
            IntSequence filler = sequence.subSequence(entry.getValue(), suffixTable[entry.getKey()]);
            System.out.printf("%2d:%2d| %s {%s}\n", entry.getValue(), suffixTable[entry.getKey()],
                    corpus.toString(suffix, ""), corpus.toString(filler, ""));
        }
    }
}
TOP

Related Classes of com.googlecode.gaal.suffix.impl.EmbeddedSuffixTreeImplTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.