Examples of org.apache.lucene.analysis.compound.hyphenation.HyphenationTree

Package org.apache.lucene.analysis.compound.hyphenation

Examples of org.apache.lucene.analysis.compound.hyphenation.HyphenationTree

org.apache.lucene.analysis.compound.hyphenation.HyphenationTree
aphics.apache.org/fop/). They have been slightly modified.

   * @return An object representing the hyphenation patterns
   * @throws Exception
   */
  public static HyphenationTree getHyphenationTree(Reader hyphenationReader)
      throws Exception {
    HyphenationTree tree = new HyphenationTree();


    tree.loadPatterns(new InputSource(hyphenationReader));


    return tree;
  }

View Full Code Here

   * @return An object representing the hyphenation patterns
   * @throws Exception
   */
  public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
      throws Exception {
    HyphenationTree tree = new HyphenationTree();
    tree.loadPatterns(hyphenationSource);
    return tree;
  }

View Full Code Here

    if (reader == null) {
      // we gracefully die if we have no reader
      return;
    }


    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(reader);


    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
        new WhitespaceTokenizer(new StringReader(
            "Rindfleischüberwachungsgesetz Drahtschere abba")), hyphenator,

View Full Code Here

    if (reader == null) {
      // we gracefully die if we have no reader
      return;
    }


    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(reader);


    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
        new WhitespaceTokenizer(new StringReader(
            "Rindfleischüberwachungsgesetz")), hyphenator, dict,

View Full Code Here

    if (reader == null) {
      // we gracefully die if we have no reader
      return;
    }


    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(reader);


    Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader(
        "Rindfleischüberwachungsgesetz"));
    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(

View Full Code Here

   * @return An object representing the hyphenation patterns
   * @throws Exception
   */
  public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
      throws Exception {
    HyphenationTree tree = new HyphenationTree();
    tree.loadPatterns(hyphenationSource);
    return tree;
  }

View Full Code Here

public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
  public void testHyphenationCompoundWordsDA() throws Exception {
    String[] dict = { "læse", "hest" };


    InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(is);


    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
        new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
        hyphenator,

View Full Code Here


  public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
    String[] dict = { "basketball", "basket", "ball", "kurv" };


    InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(is);


    // the word basket will not be added due to the longest match option
    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
        new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),

View Full Code Here

   * With hyphenation-only, you can get a lot of nonsense tokens.
   * This can be controlled with the min/max subword size.
   */
  public void testHyphenationOnly() throws Exception {
    InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(is);
    
    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
        TEST_VERSION_CURRENT,
        new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),

View Full Code Here


  public void testHyphenationCompoundWordsDA() throws Exception {
    CharArraySet dict = makeDictionary("læse", "hest");


    InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
        .getHyphenationTree(is);


    HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
        new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
        hyphenator,

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.lucene.analysis.compound.hyphenation.HyphenationTree

org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter

org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter

org.xml.sax.InputSource

java.util.HashMap

java.util.ArrayList

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.