//*-- create the spell checker
if (crawlConfig.isSpellCheck())
{ FixedWeightEditDistance fixedEdit = new FixedWeightEditDistance( MATCH_WEIGHT, DELETE_WEIGHT, INSERT_WEIGHT,
SUBSTITUTE_WEIGHT, TRANSPOSE_WEIGHT);
NGramProcessLM lm = new NGramProcessLM(NGRAM_LENGTH);
TokenizerFactory tokenizerFactory = new StandardBgramTokenizerFactory(false); //*-- do not extract entities
try { if ( (new File(Constants.SPELL_TRAIN_MODEL).exists() ) && !crawlConfig.isFreshCrawl() )
lm = readModel(Constants.SPELL_TRAIN_MODEL);
sc = new TrainSpellChecker(lm, fixedEdit, tokenizerFactory); }
catch (IOException ie) { logger.error("IO Error: Could not read spell train file " + ie.getMessage()); }
catch (ClassNotFoundException ce) { logger.error("Class error: " + ce.getMessage()); }