Package org.languagetool

Examples of org.languagetool.AnalyzedToken


    unifierConfig.setEquivalence("gender", "neutral",
        preparePOSElement(".*[\\.:]n([\\.:].*)?"));

    final Unifier uni = unifierConfig.createUnifier();

    final AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
    AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony");
    final AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");

    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);

    boolean satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    satisfied |= uni.isSatisfied(sing1b, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    uni.startNextToken();
    satisfied &= uni.isSatisfied(sing3, equiv);
    uni.startNextToken();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();

    //now test the simplified interface
    uni.isUnified(sing1, equiv, false);
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2, equiv, true);
    assertEquals(false, uni.isUnified(sing3, equiv, true));
    uni.reset();

    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");

    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();

    //now test a case when the last reading doesn't match at all

    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    final AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    final AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło");

    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2a, equiv, false);
    assertEquals(true, uni.isUnified(sing2b, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();

    //check if two features are left out correctly (both match)
    AnalyzedToken plur1 = new AnalyzedToken("zgarbieni", "adj:pl:foobar:m", "zgarbiony");
    AnalyzedToken plur2 = new AnalyzedToken("zgarbieni", "adj:pl:blabla:m", "zgarbiony");

    AnalyzedToken plur3 = new AnalyzedToken("ludzie", "subst:pl:blabla:m", "człowiek");
    AnalyzedToken plur4 = new AnalyzedToken("ludzie", "subst:pl:pampam:m", "człowiek");

    uni.isUnified(plur1, equiv, false);
    uni.isUnified(plur2, equiv, true);
    uni.isUnified(plur3, equiv, false);
    assertTrue(uni.isUnified(plur4, equiv, true));
    assertEquals("[zgarbieni[zgarbiony/adj:pl:foobar:m*,zgarbiony/adj:pl:blabla:m*], " +
        "ludzie[człowiek/subst:pl:blabla:m*,człowiek/subst:pl:pampam:m*]]", Arrays.toString(uni.getFinalUnified()));

    //check with a sequence of many tokens

    uni.reset();

    AnalyzedToken case1a = new AnalyzedToken("xx", "abc:sg:f", "xx");
    AnalyzedToken case1b = new AnalyzedToken("xx", "cde:pl:f", "xx");

    AnalyzedToken case2a = new AnalyzedToken("yy", "abc:pl:f", "yy");
    AnalyzedToken case2b = new AnalyzedToken("yy", "cde:as:f", "yy");
    AnalyzedToken case2c = new AnalyzedToken("yy", "cde:pl:c", "yy");
    AnalyzedToken case2d = new AnalyzedToken("yy", "abc:sg:f", "yy");
    AnalyzedToken case2e = new AnalyzedToken("yy", "efg:aa:e", "yy");

    uni.isUnified(case1a, equiv, false);
    uni.isUnified(case1b, equiv, true);

    uni.isUnified(case2a, equiv, false);
    uni.isUnified(case2b, equiv, false);
    uni.isUnified(case2c, equiv, false);
    uni.isUnified(case2d, equiv, false);
    assertTrue(uni.isUnified(case2e, equiv, true));
    assertEquals("[xx[xx/abc:sg:f*,xx/cde:pl:f*], yy[yy/abc:pl:f*,yy/abc:sg:f*]]",
        Arrays.toString(uni.getFinalUnified()));

    uni.reset();

    AnalyzedToken tokenComplex1_1 = new AnalyzedToken("xx", "abc:sg:f", "xx1");
    AnalyzedToken tokenComplex1_2 = new AnalyzedToken("xx", "cde:pl:f", "xx2");

    AnalyzedToken tokenComplex2_1 = new AnalyzedToken("yy", "abc:sg:f", "yy1");
    AnalyzedToken tokenComplex2_2 = new AnalyzedToken("yy", "cde:pl:f", "yy2");

    AnalyzedToken tokenComplex3 = new AnalyzedToken("zz", "cde:sg:f", "zz");

    uni.isUnified(tokenComplex1_1, equiv, false);
    uni.isUnified(tokenComplex1_2, equiv, true);

    uni.isUnified(tokenComplex2_1, equiv, false);
View Full Code Here


    unifierConfig.setEquivalence("case", "vocativus",
        preparePOSElement(".*[\\.:]voc[\\.:]?.*"));

    final Unifier uni = unifierConfig.createUnifier();

    final AnalyzedToken sing1 = new AnalyzedToken("niezgorsze", "adj:sg:acc:n1.n2:pos", "niezgorszy");
    final AnalyzedToken sing1a = new AnalyzedToken("niezgorsze", "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
    final AnalyzedToken sing1b = new AnalyzedToken("niezgorsze", "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
    final AnalyzedToken sing1c = new AnalyzedToken("niezgorsze", "adj:sg:nom.voc:n1.n2:pos", "niezgorszy");
    final AnalyzedToken sing2 = new AnalyzedToken("lekarstwo", "subst:sg:acc:n2", "lekarstwo");
    final AnalyzedToken sing2b = new AnalyzedToken("lekarstwo", "subst:sg:nom:n2", "lekarstwo");
    final AnalyzedToken sing2c = new AnalyzedToken("lekarstwo", "subst:sg:voc:n2", "lekarstwo");

    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    equiv.put("case", null);
View Full Code Here

        preparePOSElement(".*:m"));

    final Unifier uni = unifierConfig.createUnifier();

    //Latin adjectives
    final AnalyzedToken sing_masc = new AnalyzedToken("parvus", "adj:sg:blahblah:m", "parvus");
    final AnalyzedToken plur_masc = new AnalyzedToken("parvi", "adj:sg:blahblah:m", "parvus");
    final AnalyzedToken plur_fem = new AnalyzedToken("parvae", "adj:pl:blahblah:f", "parvus");
    final AnalyzedToken sing_fem = new AnalyzedToken("parva", "adj:sg:blahblah:f", "parvus");

    //Let's pretend Latin has determiners
    final AnalyzedToken det_sing_fem = new AnalyzedToken("una", "det:sg:blahblah:f", "unus");
    final AnalyzedToken det_plur_fem = new AnalyzedToken("unae", "det:pl:blahblah:f", "unus");
    final AnalyzedToken det_sing_masc = new AnalyzedToken("unus", "det:sg:blahblah:m", "unus");
    final AnalyzedToken det_plur_masc = new AnalyzedToken("uni", "det:sg:blahblah:m", "unus");

    //and nouns
    final AnalyzedToken subst_sing_fem = new AnalyzedToken("discrepatio", "subst:sg:blahblah:f", "discrepatio");
    final AnalyzedToken subst_plur_fem = new AnalyzedToken("discrepationes", "subst:sg:blahblah:f", "discrepatio");
    final AnalyzedToken subst_sing_masc = new AnalyzedToken("homo", "sg:sg:blahblah:m", "homo");
    final AnalyzedToken subst_plur_masc = new AnalyzedToken("homines", "sg:sg:blahblah:m", "homo");

    //now we should have 4x4x4 combinations...

    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
View Full Code Here

    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);

    AnalyzedToken sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    AnalyzedToken sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    AnalyzedToken sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");

    AnalyzedToken comma = new AnalyzedToken(",", "comma", ",");

    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.addNeutralElement(new AnalyzedTokenReadings(comma, 0));
    assertEquals(true, uni.isUnified(sing2, equiv, true));
View Full Code Here

    //with regular expressions
    assertEquals("[časopisy, časopisov, časopisom, časopisy, časopisy, časopisoch, časopismi, časopis, časopisu, časopisu, časopis, časopis, časopise, časopisom]", Arrays.toString(synth.synthesize(dummyToken("časopis"), "SS.*", true)));   
  }
 
  private AnalyzedToken dummyToken(String tokenStr) {
    return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
  }
View Full Code Here

    element4.setPosElement(Element.UNKNOWN_TAG+"|VBG", true, true);
   
    final Element element5 = new Element("\\p{Ll}+", false, true, false);
    element5.setPosElement(Element.UNKNOWN_TAG, false, false);       
   
    final AnalyzedToken an = new AnalyzedToken("schword", null, null);
    assertTrue(element.isMatched(an));
    assertFalse(element2.isMatched(an));
    assertTrue(element3.isMatched(an));
    assertFalse(element4.isMatched(an));
    assertTrue(element5.isMatched(an));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    an.setNoPOSTag(false);
    assertFalse(element.isMatched(an));
    assertTrue(element2.isMatched(an));
    assertFalse(element3.isMatched(an));
    assertTrue(element4.isMatched(an));
    assertFalse(element5.isMatched(an));
   
    final AnalyzedToken anSentEnd = new AnalyzedToken("schword", JLanguageTool.SENTENCE_END_TAGNAME, null);
    assertTrue(element.isMatched(anSentEnd));
    assertFalse(element2.isMatched(anSentEnd));
    assertTrue(element3.isMatched(anSentEnd));
    assertFalse(element4.isMatched(anSentEnd));
    assertTrue(element5.isMatched(anSentEnd));
   
    final Element element6 = new Element("\\p{Ll}+", false, true, false);
    element6.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME, false, false);
    assertTrue(element6.isMatched(anSentEnd));
   
    final Element element7 = new Element("\\p{Ll}+", false, true, false);
    element7.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME+"|BLABLA", true, false);
    assertTrue(element7.isMatched(anSentEnd));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    anSentEnd.setNoPOSTag(false);
    assertFalse(element.isMatched(anSentEnd));
    assertTrue(element2.isMatched(anSentEnd));
    assertFalse(element3.isMatched(anSentEnd));
    assertTrue(element4.isMatched(anSentEnd));
    assertFalse(element5.isMatched(anSentEnd));
   
    final AnalyzedToken anParaEnd = new AnalyzedToken("schword", JLanguageTool.PARAGRAPH_END_TAGNAME, null);
    assertTrue(element.isMatched(anParaEnd));
    assertFalse(element2.isMatched(anParaEnd));
    assertTrue(element3.isMatched(anParaEnd));
    assertFalse(element4.isMatched(anParaEnd));
    assertTrue(element5.isMatched(anParaEnd));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    anParaEnd.setNoPOSTag(false);
    assertFalse(element.isMatched(anParaEnd));
    assertTrue(element2.isMatched(anParaEnd));
    assertFalse(element3.isMatched(anParaEnd));
    assertTrue(element4.isMatched(anParaEnd));
    assertFalse(element5.isMatched(anParaEnd));
   
    final AnalyzedToken anWithPOS = new AnalyzedToken("schword", "POS", null);
    assertFalse(element.isMatched(anWithPOS));
    assertTrue(element2.isMatched(anWithPOS));
    assertFalse(element3.isMatched(anWithPOS));
    assertTrue(element4.isMatched(anWithPOS));
    assertFalse(element5.isMatched(anWithPOS));
View Full Code Here

    assertTrue(p.testAllReadings(tokenReadings("bar", "myChunk"), elemMatcher, null, 0, 0, 0));
    assertFalse(p.testAllReadings(tokenReadings("bar", "otherChunk"), elemMatcher, null, 0, 0, 0));
  }

  private AnalyzedTokenReadings[] tokenReadings(String token, String chunkTag) {
    AnalyzedTokenReadings tokenReadings1 = new AnalyzedTokenReadings(new AnalyzedToken(token, "pos", "lemma"), 0);
    if (chunkTag != null) {
      tokenReadings1.setChunkTags(Collections.singletonList(new ChunkTag(chunkTag)));
    }
    return new AnalyzedTokenReadings[] { tokenReadings1 };
  }
View Full Code Here

  private final RuleFilterEvaluator eval = new RuleFilterEvaluator(null);

  @Test
  public void testGetResolvedArguments() throws Exception {
    AnalyzedTokenReadings[] readingsList = {
            new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0),
            new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0)
    };
    Map<String,String> map = eval.getResolvedArguments("year:\\1 month:\\2", readingsList, Arrays.asList(1, 1));
    assertThat(map.get("year"), is("fake1"));
    assertThat(map.get("month"), is("fake2"));
    assertThat(map.size(), is(2));
View Full Code Here

  }

  @Test(expected = RuntimeException.class)
  public void testDuplicateKey() throws Exception {
    AnalyzedTokenReadings[] readingsList = {
            new AnalyzedTokenReadings(new AnalyzedToken("fake1", "SENT_START", null), 0),
            new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0),
            new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0)
    };
    eval.getResolvedArguments("year:\\1 year:\\2", readingsList, Arrays.asList(1, 2));
  }
View Full Code Here

import org.languagetool.AnalyzedToken;

public class RussianSynthesizerTest extends TestCase {
  private AnalyzedToken dummyToken(String tokenStr) {
    return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
  }
View Full Code Here

TOP

Related Classes of org.languagetool.AnalyzedToken

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.