Package edu.pitt.terminology.lexicon

Examples of edu.pitt.terminology.lexicon.Concept$TextLengthComparator


  /**
   * lookup from multiple terminologies
   */
  public Concept lookupConcept(String cui) throws TerminologyException {
    for(Terminology t: getTerminologies()){
      Concept c = t.lookupConcept(cui);
      if(c != null)
        return c;
    }
    return null;
  }
View Full Code Here


 
  /**
   * lookup concept object based on CUI
   */
  public Concept lookupConcept(String cui) {
    Concept c = (Concept) sender.sendObject(filter(new Parcel("lookup_concept",cui)));
    if(c == null)
      return null;
    c.setTerminology(this);
    return c;
  }
View Full Code Here

        // until other negation or stop word (retroactively)
        else if(token.isPostNegation() || token.isIndeterminate()){
          for(int j=0;j<negatableConcepts.size();j++){
            SentenceElement cns = negatableConcepts.get(j);
            if(cns.isConcept()){
              Concept lex = cns.getConcept();
              if(token.isPostNegation()){
                negatedConcepts.add(lex);
                Concept c = new Concept("ABSENT","Absent");
                //c.setText(token.getText());
                //c.setOffset(token.getOffset());
                Annotation.addAnnotation(c,token.getText(),token.getOffset());
                negations.add(c);
              }else{
                indeterminateConcepts.add(lex);
                Concept c = new Concept("INDETERMINATE","Indeterminate");
                //c.setText(token.getText());
                //c.setOffset(token.getOffset());
                Annotation.addAnnotation(c,token.getText(),token.getOffset());
                indeterminates.add(c);
              }
            }
          }
        }
       
        // clear retro concepts
        negatableConcepts.clear();
       
      // if token is stop word then stop the negation process
      }else if (token.isStopWord()){
        negationConcept = null;
        negatableConcepts.clear();
        window = 0;
      // do the actual negation here
      }else if (token.isConcept()){
        if(negationConcept != null){
          Concept lex = token.getConcept();
          negatedConcepts.add(lex);
          Concept c = new Concept("ABSENT","Absent");
          //c.setText(negationConcept.getText());
          //c.setOffset(negationConcept.getOffset());
          Annotation.addAnnotation(c,negationConcept.getText(),negationConcept.getOffset());
          negations.add(c);
        }else{
View Full Code Here

   * scan list and see if it matches a string
   */
  private Concept findConceptMatch(String origtext,List<Concept> list){
    String text = origtext.toLowerCase();
    for(int i=0;i<list.size();i++){
       Concept term = list.get(i);
       if(term.getText() == null)
         continue;
       String term_text = term.getText().toLowerCase().replaceAll("\\W"," ");
       if(text.length() > term.getText().length()){
          try{
           if(text.matches(term_text+"\\b.*")){
              return term;
           }
          }catch(Exception ex){
            //If we have something funny in term_text that will be misinterpreted by regex
           if(text.startsWith(term_text)){
              return term;
          }
          }
       }else if(text.length() == term.getText().length()){
         if(text.startsWith(term_text))
           return term;
        
       }
    }
View Full Code Here

        offset = offset+t.length(); // update offset
        continue;
      }
             
      // check if it is a recognized concept
      Concept key = findConceptMatch(tmp,conceptKeys);
      //System.err.println(tmp+" "+conceptKeys+" "+key);
      if(key != null){
        // create new element
        tokens.add(new SentenceElement(lex.getCharOffset(),key.getText(),key));
        offset = offset+key.getText().length(); // update offset
        continue;
      }
     
      // else it is just a token
      tokens.add(new SentenceElement(lex.getCharOffset(),lex.getTrimmedString()));
View Full Code Here

      this.is_pre = is_pre;
    }
  }
 
  public static void main(String [] args ){
    Concept c = new Concept("ulcer");
    c.setText("ulcerated");
    c.setOffset(4);
    NegEx n = new NegEx();
    n.process("non-ulcerated",Collections.singletonList(c));
    System.out.println(n.getNegatedConcepts());
  }
View Full Code Here

        String expanded = m.group(1);
        String acronym  = m.group(m.groupCount());
        expanded = getAcronymExapndedForm(expanded,acronym);
        // don't match to single words acronyms and don't match digits
        if(expanded != null && acronym.length() > 1 && !acronym.matches("\\d+")){
          Concept exp = null;
          List<Concept> acr = new ArrayList<Concept>();
          // find annotations assigned to expanded part of the acronym
          for(Concept c: r){
            if(matches(c,expanded))
              exp = c;
            else if(matches(c,acronym))
              acr.add(c);
          }
          // if expanded form was matched as a single concept
          if(exp != null){
            // fix annotations
            exp.addMatchedTerm(acronym);
            exp.setAnnotations(null);
            exp.setText(null);
            // save acronym with expanded form code
            getAcronyms().put(acronym,exp.getCode());
            // if there was a different acronym selected, then remove them
            for(Concept a: acr){
              if(!a.getCode().equals(exp.getCode()))
                concepts.remove(a);
            }
          }
        }
      }else{
        // check if acronyms exist
        for(String acronym: getAcronyms().keySet()){
          m = Pattern.compile("\\b"+acronym+"\\b").matcher(phrase);
          while(m.find()){
            // remove an already matched one
            for(Concept c: concepts){
              if(c.getMatchedTerm().equals(acronym)){
                concepts.remove(c);
                break;
              }
            }
           
            // add new concept for this acronym
            Concept c = new Concept(getAcronyms().get(acronym),acronym);
            c.setTerminology(terminology);
            c.setSearchString(phrase);
            c.setMatchedTerm(acronym);
            concepts.add(c);
          }
        }
      }
      r = concepts.toArray(new Concept [0]);
    }
   
    if(skipAbbrreviationLogic)
      return r;
   
    // look at abbreviations
    Set<String> acronyms = new HashSet<String>();
    for(Concept a: getAbbreviations().search(phrase)){
      acronyms.add(a.getName());
    }
 
    // don't do anything if nothing found
    if(acronyms.isEmpty())
      return r;
   
    // if abbreviations found
    Set<Concept> list = new LinkedHashSet<Concept>();
    for(Concept c: r){
      // add only what is not in the list
      if(!acronyms.contains(c.getMatchedTerm().toLowerCase())){
        list.add(c);
      }
    }
    // add abbreviations that are in whitelist
    for(String txt: acronyms){
      if(getAbbreviationWhitelist().containsKey(txt)){
        String cui = getAbbreviationWhitelist().get(txt);
        Concept c1 = getTerminology().lookupConcept(cui);
        if(c1 == null)
          c1 = getAbbreviations().lookupConcept(cui);
        if(c1 != null){
          c1.setSearchString(phrase);
          c1.setMatchedTerm(txt);
          list.add(c1);
        }
      }
    }
   
View Full Code Here

      // change ter
      if(preferred == null)
        preferred = terms.iterator().next();
     
      // stup concept
      Concept c = new Concept(cui);
      c.setTerminology(this);
      if(preferred != null)
        c.setName(preferred.getText());
      c.setSemanticTypes(semanticTypes.toArray(new SemanticType [0]));
      c.setSynonyms(synonyms.toArray(new String [0]));
      c.setSources(sources.toArray(new Source [0]));
      c.setTerms(terms.toArray(new Term [0]));
      c.setDefinitions(definitions.toArray(new Definition [0]));
      for(String code: codes.keySet())
        c.addCode(code,codes.get(code));
      c.setInitialized(true);
     
      return c;
     
    }catch(Exception ex){
      throw new TerminologyException("Error: Problem with lookup of "+cui,ex);
View Full Code Here

      IClass cls = (IClass)it.next();
      String code = getCode(cls);
      if(conceptMap.containsKey(code))
        continue;
     
      Concept concept = cls.getConcept();
      concept.setCode(code);
     
      // fix sources
      for(Source sr: concept.getSources())
        sr.setCode(getCode(sr.getCode()));
     
      // add relations to concept
      for(IClass c: cls.getDirectSuperClasses()){
        concept.addRelatedConcept(Relation.BROADER,getCode(c));
      }
     
      // add relations to concept
      for(IClass c: cls.getDirectSubClasses()){
        concept.addRelatedConcept(Relation.NARROWER,getCode(c));
      }
           
      // add concept
      addConcept(concept);
     
View Full Code Here

    if(offset < rowCount.get(RRFile)){
      i = 0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      Concept previousConcept = null;
      //boolean crash = false;
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_names_and_sources_file__m/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 14 ){
          String cui = fields[0].trim();
          String ts =  fields[2].trim();
          String src  = fields[11].trim();
          String text = fields[14].trim();
          String lang = fields[1].trim();
          String form = fields[12].trim();
          String code = fields[13].trim();
          String pref = fields[6].trim();
          String sup  = fields[16].trim();
         
          Source source = Source.getSource(src);
         
          // display progress bar
          if((i % 10000) == 0){
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
            ((JDBMMap) infoMap).commit();
            ((JDBMMap) termMap).commit();
            ((JDBMMap) regexMap).commit();
            ((JDBMMap) conceptMap).commit();
            /*if(i > 0 && i % 500000 == 0){
              crash = true;
            }*/
          }
          i++;
         
          // filter out by language
          if(filterLang != null && !filterLang.contains(lang))
            continue;
         
          // filter out by source
          if(filterSources != null && !filterSources.contains(src)){
            if(!(code.startsWith("V-") && filterSources.contains(code.substring(2))))
              continue;
          }
         
          // honor suppress flag
          if("O".equals(sup))
            continue;
         
          // get concept from map
          Concept c = convertConcept(conceptMap.get(cui));
          if(c == null){
            // if concept is not in map, see if previous is it
            if(previousConcept != null && previousConcept.getCode().equals(cui)){
              c = previousConcept;
            }else{
              c = new Concept(cui,text);
              prefNameSource = null;
            }
          }
         
          // create a term
          Term term = new Term(text);
          term.setForm(form);
          term.setLanguage(lang);
          term.setSource(source);
          if("y".equalsIgnoreCase(pref) && "P".equalsIgnoreCase(ts))
            term.setPreferred(true);
         
          // add to concept
          c.addSynonym(text);
          c.addSource(source);
          c.addTerm(term);
          c.addCode(code, source);
         
          // set preferred name for the first time
          if(term.isPreferred()){
            // if prefered name source is not set OR
            // we have filtering and the new source offset is less then old source offset (which means higher priority)
            if(prefNameSource == null || (filterSources != null && filterSources.indexOf(src) < filterSources.indexOf(prefNameSource))){
              c.setName(text);
              prefNameSource = src;
             
            }
          }
          term = null;
         
          // now see if we pretty much got the entire concept and should put it in
          if(previousConcept != null && !previousConcept.getCode().equals(cui)){
            addConcept(previousConcept);
            infoMap.put("max.terms.per.word",""+maxTermsPerWord);
            infoMap.put("total.terms.per.word",""+totalTermsPerWord);
            /*if(crash)
              crash();*/
          }
          previousConcept = c;
        }
        infoMap.put(RRFile,""+i);
     
      }
      // save last one
      if(previousConcept != null)
        addConcept(previousConcept);
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    // commit info terms and regex
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Term Information ...");
    ((JDBMMap) infoMap).commit();
    ((JDBMMap) termMap).commit();
    ((JDBMMap) regexMap).commit();
    ((JDBMMap) conceptMap).commit();
   
    // now do temp word dir
    File tempDir = new File(location,TEMP_WORD_DIR);
    if(useTempWordFolder && tempDir.exists()){
      useTempWordFolder = false;
      File [] files = tempDir.listFiles();
      offset = 0;
      RRFile = TEMP_WORD_DIR;
      if(infoMap.containsKey(RRFile)){
        offset = Integer.parseInt(infoMap.get(RRFile));
      }
      // if offset is smaller then total, read file
      if(offset < files.length){
        pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading temporary word files ...");
        pcs.firePropertyChange(LOADING_TOTAL,null,files.length);
        i = 0;
        for(File f: files){
          if(i < offset){
            i++;
            continue;
          }
          // display progress bar
          if((i % (files.length/100)) == 0){
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
          }
          i++;
         
          //load file content
          String word = f.getName();
          Set<String> terms = new HashSet<String>();
          BufferedReader rd = new BufferedReader(new FileReader(f));
          for(String l = rd.readLine();l != null; l = rd.readLine()){
            terms.add(l.trim());
          }
          rd.close();
         
          // set words
          setWordTerms(word,terms);
          infoMap.put(RRFile,""+i);
        }
      }else{
        pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
      }
    }
   
    // save some meta information
    infoMap.put("word.count",""+wordMap.size());
    infoMap.put("term.count",""+termMap.size());
    infoMap.put("concept.count",""+conceptMap.size());
    infoMap.put("average.terms.per.word",""+totalTermsPerWord/wordMap.size());
    infoMap.put("max.terms.per.word",""+maxTermsPerWord);
   
    // good time to save term info
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Word Information ...");
    ((JDBMMap) infoMap).commit();
    ((JDBMMap) wordMap).commit();
    ((JDBMMap) wordStatMap).commit();
   
    // lets go over definitions
    offset = 0;
    RRFile = "MRDEF.RRF";
    if(infoMap.containsKey(RRFile)){
      offset = Integer.parseInt(infoMap.get(RRFile));
    }
   
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
     
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i = 0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 5 ){
          String cui = fields[0].trim();
          String src = fields[4].trim();
          String text = fields[5].trim();
         
          Definition d = Definition.getDefinition(text);
          d.setSource(Source.getSource(src));
         
          // get concept from map
          Concept c = convertConcept(conceptMap.get(cui));
          if(c != null){
            c.addDefinition(d);
            // replace with new concept
            conceptMap.put(cui,c.getContent());
          }
          if((i % 10000) == 0)
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
        }
        i++;
        infoMap.put(RRFile,""+i);
      }
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    // go over semantic types
    offset = 0;
    RRFile = "MRSTY.RRF";
    if(infoMap.containsKey(RRFile)){
      offset = Integer.parseInt(infoMap.get(RRFile));
    }
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i=0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 3 ){
          String cui = fields[0].trim();
          String tui = fields[1].trim();
          String text = fields[3].trim();
         
          // get concept from map
          Concept c = convertConcept(conceptMap.get(cui));
          if(c != null){
            // filter out by source
            if(filterSemTypes != null && !filterSemTypes.contains(text)){
              removeConcept(c);
            }else{
              c.addSemanticType(new SemanticType(text,tui));
              // replace with new concept
              conceptMap.put(cui,c.getContent());
            }
          }
        }
        if((i % 10000) == 0)
          pcs.firePropertyChange(LOADING_PROGRESS,null,i);
        i++;
        infoMap.put(RRFile,""+i);
      }
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    //process relationships?
    offset = 0;
    RRFile = "MRREL.RRF";
    if(infoMap.containsKey(RRFile)){
      offset = Integer.parseInt(infoMap.get(RRFile));
    }
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i=0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      List<String> filterRelations = Arrays.asList("RB","RN","PAR","CHD");
      //Concept previousConcept = null;
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 5 ){
          String cui1 = fields[0].trim();
          String cui2 = fields[4].trim();
          String rel = fields[3].trim();
          String src = fields[10].trim();
         
          // filter by known source if
          if(relationSources != null && !relationSources.contains(src))
            continue;
         
          // filter by known relationship
          if(filterRelations.contains(rel) && !cui1.equals(cui2)){
            Relation re = null;
            if("RB".equals(rel) || "PAR".equals(rel))
              re = Relation.BROADER;
            else if("RN".equals(rel) || "CHD".equals(rel))
              re = Relation.NARROWER;
           
            // get concept from map
            Concept c = convertConcept(conceptMap.get(cui1));
            if(c != null && re != null){
              c.addRelatedConcept(re,cui2);
              // replace with new concept
              conceptMap.put(cui1,c.getContent());
            }
          } 
        }
        if((i % 10000) == 0)
          pcs.firePropertyChange(LOADING_PROGRESS,null,i);
View Full Code Here

TOP

Related Classes of edu.pitt.terminology.lexicon.Concept$TextLengthComparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.