if(offset < rowCount.get(RRFile)){
i = 0;
pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
r = new BufferedReader(new FileReader(new File(dir,RRFile)));
Concept previousConcept = null;
//boolean crash = false;
for(String line = r.readLine(); line != null; line = r.readLine()){
if(i < offset){
i++;
continue;
}
// parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_names_and_sources_file__m/?report=objectonly
String [] fields = line.split("\\|");
if(fields.length >= 14 ){
String cui = fields[0].trim();
String ts = fields[2].trim();
String src = fields[11].trim();
String text = fields[14].trim();
String lang = fields[1].trim();
String form = fields[12].trim();
String code = fields[13].trim();
String pref = fields[6].trim();
String sup = fields[16].trim();
Source source = Source.getSource(src);
// display progress bar
if((i % 10000) == 0){
pcs.firePropertyChange(LOADING_PROGRESS,null,i);
((JDBMMap) infoMap).commit();
((JDBMMap) termMap).commit();
((JDBMMap) regexMap).commit();
((JDBMMap) conceptMap).commit();
/*if(i > 0 && i % 500000 == 0){
crash = true;
}*/
}
i++;
// filter out by language
if(filterLang != null && !filterLang.contains(lang))
continue;
// filter out by source
if(filterSources != null && !filterSources.contains(src)){
if(!(code.startsWith("V-") && filterSources.contains(code.substring(2))))
continue;
}
// honor suppress flag
if("O".equals(sup))
continue;
// get concept from map
Concept c = convertConcept(conceptMap.get(cui));
if(c == null){
// if concept is not in map, see if previous is it
if(previousConcept != null && previousConcept.getCode().equals(cui)){
c = previousConcept;
}else{
c = new Concept(cui,text);
prefNameSource = null;
}
}
// create a term
Term term = new Term(text);
term.setForm(form);
term.setLanguage(lang);
term.setSource(source);
if("y".equalsIgnoreCase(pref) && "P".equalsIgnoreCase(ts))
term.setPreferred(true);
// add to concept
c.addSynonym(text);
c.addSource(source);
c.addTerm(term);
c.addCode(code, source);
// set preferred name for the first time
if(term.isPreferred()){
// if prefered name source is not set OR
// we have filtering and the new source offset is less then old source offset (which means higher priority)
if(prefNameSource == null || (filterSources != null && filterSources.indexOf(src) < filterSources.indexOf(prefNameSource))){
c.setName(text);
prefNameSource = src;
}
}
term = null;
// now see if we pretty much got the entire concept and should put it in
if(previousConcept != null && !previousConcept.getCode().equals(cui)){
addConcept(previousConcept);
infoMap.put("max.terms.per.word",""+maxTermsPerWord);
infoMap.put("total.terms.per.word",""+totalTermsPerWord);
/*if(crash)
crash();*/
}
previousConcept = c;
}
infoMap.put(RRFile,""+i);
}
// save last one
if(previousConcept != null)
addConcept(previousConcept);
r.close();
}else{
pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
}
// commit info terms and regex
pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Term Information ...");
((JDBMMap) infoMap).commit();
((JDBMMap) termMap).commit();
((JDBMMap) regexMap).commit();
((JDBMMap) conceptMap).commit();
// now do temp word dir
File tempDir = new File(location,TEMP_WORD_DIR);
if(useTempWordFolder && tempDir.exists()){
useTempWordFolder = false;
File [] files = tempDir.listFiles();
offset = 0;
RRFile = TEMP_WORD_DIR;
if(infoMap.containsKey(RRFile)){
offset = Integer.parseInt(infoMap.get(RRFile));
}
// if offset is smaller then total, read file
if(offset < files.length){
pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading temporary word files ...");
pcs.firePropertyChange(LOADING_TOTAL,null,files.length);
i = 0;
for(File f: files){
if(i < offset){
i++;
continue;
}
// display progress bar
if((i % (files.length/100)) == 0){
pcs.firePropertyChange(LOADING_PROGRESS,null,i);
}
i++;
//load file content
String word = f.getName();
Set<String> terms = new HashSet<String>();
BufferedReader rd = new BufferedReader(new FileReader(f));
for(String l = rd.readLine();l != null; l = rd.readLine()){
terms.add(l.trim());
}
rd.close();
// set words
setWordTerms(word,terms);
infoMap.put(RRFile,""+i);
}
}else{
pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
}
}
// save some meta information
infoMap.put("word.count",""+wordMap.size());
infoMap.put("term.count",""+termMap.size());
infoMap.put("concept.count",""+conceptMap.size());
infoMap.put("average.terms.per.word",""+totalTermsPerWord/wordMap.size());
infoMap.put("max.terms.per.word",""+maxTermsPerWord);
// good time to save term info
pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Word Information ...");
((JDBMMap) infoMap).commit();
((JDBMMap) wordMap).commit();
((JDBMMap) wordStatMap).commit();
// lets go over definitions
offset = 0;
RRFile = "MRDEF.RRF";
if(infoMap.containsKey(RRFile)){
offset = Integer.parseInt(infoMap.get(RRFile));
}
if(!new File(dir,RRFile).exists()){
pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
offset = Integer.MAX_VALUE;
}
// if offset is smaller then total, read file
if(offset < rowCount.get(RRFile)){
i = 0;
pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
r = new BufferedReader(new FileReader(new File(dir,RRFile)));
for(String line = r.readLine(); line != null; line = r.readLine()){
if(i < offset){
i++;
continue;
}
// parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
String [] fields = line.split("\\|");
if(fields.length >= 5 ){
String cui = fields[0].trim();
String src = fields[4].trim();
String text = fields[5].trim();
Definition d = Definition.getDefinition(text);
d.setSource(Source.getSource(src));
// get concept from map
Concept c = convertConcept(conceptMap.get(cui));
if(c != null){
c.addDefinition(d);
// replace with new concept
conceptMap.put(cui,c.getContent());
}
if((i % 10000) == 0)
pcs.firePropertyChange(LOADING_PROGRESS,null,i);
}
i++;
infoMap.put(RRFile,""+i);
}
r.close();
}else{
pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
}
// go over semantic types
offset = 0;
RRFile = "MRSTY.RRF";
if(infoMap.containsKey(RRFile)){
offset = Integer.parseInt(infoMap.get(RRFile));
}
if(!new File(dir,RRFile).exists()){
pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
offset = Integer.MAX_VALUE;
}
// if offset is smaller then total, read file
if(offset < rowCount.get(RRFile)){
i=0;
pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
r = new BufferedReader(new FileReader(new File(dir,RRFile)));
for(String line = r.readLine(); line != null; line = r.readLine()){
if(i < offset){
i++;
continue;
}
// parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
String [] fields = line.split("\\|");
if(fields.length >= 3 ){
String cui = fields[0].trim();
String tui = fields[1].trim();
String text = fields[3].trim();
// get concept from map
Concept c = convertConcept(conceptMap.get(cui));
if(c != null){
// filter out by source
if(filterSemTypes != null && !filterSemTypes.contains(text)){
removeConcept(c);
}else{
c.addSemanticType(new SemanticType(text,tui));
// replace with new concept
conceptMap.put(cui,c.getContent());
}
}
}
if((i % 10000) == 0)
pcs.firePropertyChange(LOADING_PROGRESS,null,i);
i++;
infoMap.put(RRFile,""+i);
}
r.close();
}else{
pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
}
//process relationships?
offset = 0;
RRFile = "MRREL.RRF";
if(infoMap.containsKey(RRFile)){
offset = Integer.parseInt(infoMap.get(RRFile));
}
if(!new File(dir,RRFile).exists()){
pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
offset = Integer.MAX_VALUE;
}
// if offset is smaller then total, read file
if(offset < rowCount.get(RRFile)){
i=0;
pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
r = new BufferedReader(new FileReader(new File(dir,RRFile)));
List<String> filterRelations = Arrays.asList("RB","RN","PAR","CHD");
//Concept previousConcept = null;
for(String line = r.readLine(); line != null; line = r.readLine()){
if(i < offset){
i++;
continue;
}
// parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
String [] fields = line.split("\\|");
if(fields.length >= 5 ){
String cui1 = fields[0].trim();
String cui2 = fields[4].trim();
String rel = fields[3].trim();
String src = fields[10].trim();
// filter by known source if
if(relationSources != null && !relationSources.contains(src))
continue;
// filter by known relationship
if(filterRelations.contains(rel) && !cui1.equals(cui2)){
Relation re = null;
if("RB".equals(rel) || "PAR".equals(rel))
re = Relation.BROADER;
else if("RN".equals(rel) || "CHD".equals(rel))
re = Relation.NARROWER;
// get concept from map
Concept c = convertConcept(conceptMap.get(cui1));
if(c != null && re != null){
c.addRelatedConcept(re,cui2);
// replace with new concept
conceptMap.put(cui1,c.getContent());
}
}
}
if((i % 10000) == 0)
pcs.firePropertyChange(LOADING_PROGRESS,null,i);