Package org.languagetool

Examples of org.languagetool.AnalyzedToken


  private AnalyzedTokenReadings[] getAnalyzedTokenReadings(final String input) throws IOException {
    return languageTool.getAnalyzedSentence(input).getTokensWithoutWhitespace();
  }

  private AnalyzedTokenReadings getAnalyzedTokenReadings(String token, String posTag, String lemma) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, posTag, lemma), 0);
  }
View Full Code Here


    final Element elUpper = new Element("\\p{Lu}\\p{Ll}+", true, true, false);
    final Element elAllUpper = new Element("\\p{Lu}+$", true, true, false);
    uni.setEquivalence("case-sensitivity", "lowercase", elLower);
    uni.setEquivalence("case-sensitivity", "uppercase", elUpper);
    uni.setEquivalence("case-sensitivity", "alluppercase", elAllUpper);
    final AnalyzedToken lower1 = new AnalyzedToken("lower", "JJR", "lower");
    final AnalyzedToken lower2 = new AnalyzedToken("lowercase", "JJ", "lowercase");
    final AnalyzedToken upper1 = new AnalyzedToken("Uppercase", "JJ", "Uppercase");
    final AnalyzedToken upper2 = new AnalyzedToken("John", "NNP", "John");
    final AnalyzedToken upperAll1 = new AnalyzedToken("JOHN", "NNP", "John");
    final AnalyzedToken upperAll2 = new AnalyzedToken("JAMES", "NNP", "James");

    final Map<String, List<String>> equiv = new HashMap<>();
    final List<String> list1 = new ArrayList<>();
    list1.add("lowercase");
    equiv.put("case-sensitivity", list1);
View Full Code Here

    uni.setEquivalence("number", "singular", sgElement);
    final Element plElement = new Element("", false, false, false);
    plElement.setPosElement(".*[\\.:]pl:.*", true, false);
    uni.setEquivalence("number", "plural", plElement);

    final AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah", "mały");
    final AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah", "człowiek");

    final Map<String, List<String>> equiv = new HashMap<>();
    final List<String> list1 = new ArrayList<>();
    list1.add("singular");
    equiv.put("number", list1);

    boolean satisfied = uni.isSatisfied(sing1, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    assertEquals(true, satisfied);
    uni.reset();

    //for multiple readings - OR for interpretations, AND for tokens
    AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    assertEquals(true, satisfied);
    uni.reset();

    //check if any of the equivalences is there
    list1.add("plural");
    equiv.clear();
    equiv.put("number", list1);
    sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    assertEquals(true, satisfied);
    uni.reset();

    //now test all possible feature equivalences by leaving type blank
    sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    equiv.clear();
    equiv.put("number", null);
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
View Full Code Here

    final Element mascElement = new Element("", false, false, false);
    mascElement.setPosElement(".*[\\.:]m", true, false);
    uni.setEquivalence("gender", "masculine", mascElement);

    final AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
    final AnalyzedToken sing1a = new AnalyzedToken("mała", "adj:sg:blahblah:f", "mały");
    final AnalyzedToken sing1b = new AnalyzedToken("małe", "adj:pl:blahblah:m", "mały");
    final AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");

    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);

View Full Code Here

    uni.setEquivalence("gender", "masculine", mascElement);
    final Element neutElement = new Element("", false, false, false);
    neutElement.setPosElement(".*[\\.:]n([\\.:].*)?", true, false);
    uni.setEquivalence("gender", "neutral", neutElement);

    final AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
    AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony");   
    final AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
   
    final Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);

    boolean satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    satisfied |= uni.isSatisfied(sing1b, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    uni.startNextToken();
    satisfied &= uni.isSatisfied(sing3, equiv);
    uni.startNextToken();
    assertEquals(false, satisfied);
    uni.reset();

    //now test the simplified interface   
    uni.isUnified(sing1, equiv, false);
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2, equiv, true);   
    assertEquals(false, uni.isUnified(sing3, equiv, true));
    uni.reset();

    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
   
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);   
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();

    //now test a case when the last reading doesn't match at all

    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    final AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    final AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło");
   
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2a, equiv, false);
    assertEquals(true, uni.isUnified(sing2b, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
   
    //check if two features are left out correctly (both match)
    AnalyzedToken plur1 = new AnalyzedToken("zgarbieni", "adj:pl:foobar:m", "zgarbiony");
    AnalyzedToken plur2 = new AnalyzedToken("zgarbieni", "adj:pl:blabla:m", "zgarbiony");
   
    AnalyzedToken plur3 = new AnalyzedToken("ludzie", "subst:pl:blabla:m", "człowiek");
    AnalyzedToken plur4 = new AnalyzedToken("ludzie", "subst:pl:pampam:m", "człowiek");

    uni.isUnified(plur1, equiv, false);
    uni.isUnified(plur2, equiv, true);
    uni.isUnified(plur3, equiv, false);
    assertTrue(uni.isUnified(plur4, equiv, true));
View Full Code Here

      final Element mascElement = new Element("", false, false, false);
      mascElement.setPosElement(".*:m", true, false);
      uni.setEquivalence("gender", "masculine", mascElement);

      //Latin adjectives
      final AnalyzedToken sing_masc = new AnalyzedToken("parvus", "adj:sg:blahblah:m", "parvus");
      final AnalyzedToken plur_masc = new AnalyzedToken("parvi", "adj:sg:blahblah:m", "parvus");
      final AnalyzedToken plur_fem = new AnalyzedToken("parvae", "adj:pl:blahblah:f", "parvus");
      final AnalyzedToken sing_fem = new AnalyzedToken("parva", "adj:sg:blahblah:f", "parvus");
     
      //Let's pretend Latin has determiners
      final AnalyzedToken det_sing_fem = new AnalyzedToken("una", "det:sg:blahblah:f", "unus");
      final AnalyzedToken det_plur_fem = new AnalyzedToken("unae", "det:pl:blahblah:f", "unus");
      final AnalyzedToken det_sing_masc = new AnalyzedToken("unus", "det:sg:blahblah:m", "unus");
      final AnalyzedToken det_plur_masc = new AnalyzedToken("uni", "det:sg:blahblah:m", "unus");
     
      //and nouns
      final AnalyzedToken subst_sing_fem = new AnalyzedToken("discrepatio", "subst:sg:blahblah:f", "discrepatio");
      final AnalyzedToken subst_plur_fem = new AnalyzedToken("discrepationes", "subst:sg:blahblah:f", "discrepatio");
      final AnalyzedToken subst_sing_masc = new AnalyzedToken("homo", "sg:sg:blahblah:m", "homo");
      final AnalyzedToken subst_plur_masc = new AnalyzedToken("homines", "sg:sg:blahblah:m", "homo");
     
      //now we should have 4x4x4 combinations...
     
      final Map<String, List<String>> equiv = new HashMap<>();
      equiv.put("number", null);
View Full Code Here

        final String[] parts = line.split(";");
        if (parts.length != 3) {
          throw new IOException("Format error in file " + fileName + ", line: " + line + ", " +
                  "expected 3 semicolon-separated parts, got " + parts.length);
        }
        final AnalyzedToken analyzedToken = new AnalyzedToken(parts[1], parts[2], null);
        map.put(parts[0], new AnalyzedTokenReadings(analyzedToken, 0));
      }
    } finally {
      scanner.close();
    }
View Full Code Here

 
  @Override
  public List<AnalyzedToken> additionalTags(String word) {
    if ( NUMBER.matcher(word).matches() ){
      List<AnalyzedToken> additionalTaggedTokens  = new ArrayList<>();
      additionalTaggedTokens.add(new AnalyzedToken(word, IPOSTag.numr.toString(), word));
        return additionalTaggedTokens;
    }
    return null;
  }
View Full Code Here

    element4.setPosElement(Element.UNKNOWN_TAG+"|VBG", true, true);
   
    final Element element5 = new Element("\\p{Ll}+", false, true, false);
    element5.setPosElement(Element.UNKNOWN_TAG, false, false);       
   
    final AnalyzedToken an = new AnalyzedToken("schword", null, null);
    assertTrue(element.isMatched(an));
    assertFalse(element2.isMatched(an));
    assertTrue(element3.isMatched(an));
    assertFalse(element4.isMatched(an));
    assertTrue(element5.isMatched(an));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    an.setNoPOSTag(false);
    assertFalse(element.isMatched(an));
    assertTrue(element2.isMatched(an));
    assertFalse(element3.isMatched(an));
    assertTrue(element4.isMatched(an));
    assertFalse(element5.isMatched(an));
   
    final AnalyzedToken anSentEnd = new AnalyzedToken("schword", JLanguageTool.SENTENCE_END_TAGNAME, null);
    assertTrue(element.isMatched(anSentEnd));
    assertFalse(element2.isMatched(anSentEnd));
    assertTrue(element3.isMatched(anSentEnd));
    assertFalse(element4.isMatched(anSentEnd));
    assertTrue(element5.isMatched(anSentEnd));
   
    final Element element6 = new Element("\\p{Ll}+", false, true, false);
    element6.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME, false, false);
    assertTrue(element6.isMatched(anSentEnd));
   
    final Element element7 = new Element("\\p{Ll}+", false, true, false);
    element7.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME+"|BLABLA", true, false);
    assertTrue(element7.isMatched(anSentEnd));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    anSentEnd.setNoPOSTag(false);
    assertFalse(element.isMatched(anSentEnd));
    assertTrue(element2.isMatched(anSentEnd));
    assertFalse(element3.isMatched(anSentEnd));
    assertTrue(element4.isMatched(anSentEnd));
    assertFalse(element5.isMatched(anSentEnd));
   
    final AnalyzedToken anParaEnd = new AnalyzedToken("schword", JLanguageTool.PARAGRAPH_END_TAGNAME, null);
    assertTrue(element.isMatched(anParaEnd));
    assertFalse(element2.isMatched(anParaEnd));
    assertTrue(element3.isMatched(anParaEnd));
    assertFalse(element4.isMatched(anParaEnd));
    assertTrue(element5.isMatched(anParaEnd));
   
    // if the AnalyzedToken is in the set of readings that have
    //non-null tags...
    anParaEnd.setNoPOSTag(false);
    assertFalse(element.isMatched(anParaEnd));
    assertTrue(element2.isMatched(anParaEnd));
    assertFalse(element3.isMatched(anParaEnd));
    assertTrue(element4.isMatched(anParaEnd));
    assertFalse(element5.isMatched(anParaEnd));
   
    final AnalyzedToken anWithPOS = new AnalyzedToken("schword", "POS", null);
    assertFalse(element.isMatched(anWithPOS));
    assertTrue(element2.isMatched(anWithPOS));
    assertFalse(element3.isMatched(anWithPOS));
    assertTrue(element4.isMatched(anWithPOS));
    assertFalse(element5.isMatched(anWithPOS));
View Full Code Here

import java.util.Arrays;

public class MatchTest extends TestCase {

  private AnalyzedTokenReadings getAnalyzedTokenReadings(String token, String posTag, String lemma) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, posTag, lemma), 0);
  }
View Full Code Here

TOP

Related Classes of org.languagetool.AnalyzedToken

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.