Package eu.hlavki.text.lemmagen.api

Examples of eu.hlavki.text.lemmagen.api.Lemmatizer


    }

    @Test
    public void testOneWord() {
        try {
            Lemmatizer lm = LemmatizerFactory.getPrebuilt("mlteast-mk");
            assertEquals("инвестиција", lm.lemmatize("инвестиции"));
        } catch (IOException e) {
            fail(e.getMessage());
        }
    }
View Full Code Here


    }

    @Test
    public void longEnglishText() {
        try {
            Lemmatizer lm = LemmatizerFactory.getPrebuilt("mlteast-en");
            String text = "On the other hand, inflectional paradigms, "
                    + "or lists of inflected forms of typical words (such as sing, sang, "
                    + "sung, sings, singing, singer, singers, song, songs, songstress, "
                    + "songstresses in English) need to be analyzed according to criteria "
                    + "for uncovering the underlying lexical stem.";
            String[] words = text.split("(?=[,.])|\\s+");
            for (String word : words) {
                if (word.trim().length() > 1) {
                    CharSequence lemma = lm.lemmatize(word.trim());
                    if (!word.equals(lemma)) {
                        System.out.println(word + " -> " + lemma);
                    }
                }
            }
View Full Code Here

    }

    @Test
    public void shortEnglishText() {
        try {
            Lemmatizer lm = LemmatizerFactory.getPrebuilt("mlteast-en");
            assertEquals("be", lm.lemmatize("are"));
            assertEquals("sing", lm.lemmatize("singing"));
        } catch (IOException e) {
            fail(e.getMessage());
        }
    }
View Full Code Here

    }

    @Test
    public void testOneWord() {
        try {
            Lemmatizer lm = LemmatizerFactory.getPrebuilt("mlteast-fa");
            assertEquals("دوست", lm.lemmatize("دوستان"));
        } catch (IOException e) {
            fail(e.getMessage());
        }
    }
View Full Code Here

    public static Lemmatizer getPrebuilt(String name) throws IOException {
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        String resource = MessageFormat.format(PREBUILD_PATTERN, name);
        InputStream in = cl.getResourceAsStream(resource);
        Lemmatizer result = null;
        if (in != null) {
            result = read(in);
        } else {
            throw new IOException("Cannot found resource " + resource);
        }
View Full Code Here

            }
        }
    }

    public static Lemmatizer read(InputStream in) throws IOException {
        Lemmatizer retVal = null;
        ObjectInputStream ois = null;
        try {
            GZIPInputStream zis = new GZIPInputStream(in);
            ois = new ObjectInputStream(zis);
            retVal = new DefaultLemmatizer(ois);
View Full Code Here

TOP

Related Classes of eu.hlavki.text.lemmagen.api.Lemmatizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.