Package com.atilika.kuromoji.dict

Examples of com.atilika.kuromoji.dict.UnknownDictionary


    public UnknownDictionaryBuilder(String encoding) {
    this.encoding = encoding;
  }
 
  public UnknownDictionary build(String dirname) throws IOException {
    UnknownDictionary unkDictionary = null;
    unkDictionary = readDictionaryFile(dirname + File.separator + "unk.def")//Should be only one file
    readCharacterDefinition(dirname + File.separator + "char.def", unkDictionary);
    return unkDictionary;
  }
View Full Code Here


    return readDictionaryFile(filename, encoding);
  }

  public UnknownDictionary readDictionaryFile(String filename, String encoding)
    throws IOException {
    UnknownDictionary dictionary = new UnknownDictionary(5 * 1024 * 1024);
   
    FileInputStream inputStream = new FileInputStream(filename);
    InputStreamReader streamReader = new InputStreamReader(inputStream, encoding);
    LineNumberReader lineReader = new LineNumberReader(streamReader);
   
    dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));

    String line = null;
    while ((line = lineReader.readLine()) != null) {
      dictionary.put(CSVUtil.parse(line)); // Probably we don't need to validate entry
    }
   
    lineReader.close();
    return dictionary;
  }
View Full Code Here

    }

    private void buildUnknownWordDictionary(String inputDirname, String outputDirname, String encoding) throws IOException {
        System.out.print("building unknown word dict...");
        UnknownDictionaryBuilder unkBuilder = new UnknownDictionaryBuilder(encoding);
        UnknownDictionary unkDictionary = unkBuilder.build(inputDirname);
        unkDictionary.write(outputDirname);
        System.out.println("done");
    }
View Full Code Here

TOP

Related Classes of com.atilika.kuromoji.dict.UnknownDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.