SetupAuiFirstWord setupFword = new SetupAuiFirstWord();
setupFword.setupAuiFirstWord();
}
public void setupAuiFirstWord() {
UMLSDao umlsDao = KernelContextHolder.getApplicationContext().getBean(
UMLSDao.class);
TransactionTemplate t = new TransactionTemplate(KernelContextHolder
.getApplicationContext().getBean(
PlatformTransactionManager.class));
t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
// delete all records
// umlsDao.deleteAuiFirstWord();
// get all auis and their strings
// restart processing after the last aui we processed.
// if this is null, then just process everything
String lastAui = umlsDao.getLastAui();
List<Object[]> listAuiStr = null;
do {
// get the next 10k auis
listAuiStr = umlsDao.getAllAuiStr(lastAui);
// put the aui - fword pairs in a list
List<UmlsAuiFirstWord> listFword = new ArrayList<UmlsAuiFirstWord>(
1000);
for (Object[] auiStr : listAuiStr) {
String aui = (String) auiStr[0];
String str = (String) auiStr[1];
lastAui = aui;
if (str.length() < 200) {
try {
UmlsAuiFirstWord fw = this.tokenizeStr(aui, str);
if (fw == null)
log.error("Error tokenizing aui=" + aui + ", str="
+ str);
else if (fw.getFword().length() > 70)
log.debug("fword too long: aui=" + aui + ", str="
+ fw.getFword());
else if (fw.getTokenizedStr().length() > 250)
log.debug("string too long: aui=" + aui + ", str="
+ str);
else {
if (log.isDebugEnabled())
log.debug("aui=" + aui + ", fw=" + fw);
listFword.add(fw);
}
} catch (Exception e) {
log.error("Error tokenizing aui=" + aui + ", str="
+ str, e);
}
} else {
log.debug("Skipping aui because str to long: aui=" + aui
+ ", str=" + str);
}
}
// batch insert
if (listFword.size() > 0) {
umlsDao.insertAuiFirstWord(listFword);
log.info("inserted " + listFword.size() + " rows");
}
} while (listAuiStr.size() > 0);
}