Package edu.pitt.dbmi.nlp.noble.terminology

Examples of edu.pitt.dbmi.nlp.noble.terminology.Concept


        }
      }else if("Description".equals(e.getTagName())){
        storage.getInfoMap().put("description",e.getTextContent().trim());
      }else if("Concepts".equals(e.getTagName())){
        for(Element cc: XMLUtils.getElementsByTagName(e,"Concept")){
          Concept c = new Concept("");
          c.fromElement(cc);
          addConcept(c);
        }
      }else if("Options".equals(e.getTagName())){
        Properties p = new Properties();
        for(Element op: XMLUtils.getElementsByTagName(e,"Option")){
View Full Code Here


           
      // create
      List<Concept> termConcepts = new ArrayList<Concept>();
      double score = getDefaultScore(term,oterm,resultTerms);
      for(String code: codes){
        Concept c = convertConcept(code);
        if(c != null){
          c.setInitialized(true);
        }else{
          c = new Concept(code,term);
        }
        // clone
        c = c.clone();
        c.setTerminology(this);
        c.addMatchedTerm(oterm);
        c.setSearchString(text);
       
        if(ignoreAcronyms && isAcronym(c))
          continue;
     
        scoreConcept(c,term,score);
       
        // filter out really bad ones
        if(!scoreConcepts || c.getScore() >= 0.5)
          termConcepts.add(c);
      }
      // add to results
      for(Concept c: getBestCandidates(termConcepts)){
        if(!isFilteredOut(c)){
View Full Code Here

 
  public static void main(String [] args) throws Exception{
    TextTools.isCommonWord("");
    NobleCoderTerminology t = new NobleCoderTerminology();
    t.setIgnoreDigits(false);
    Concept n = new Concept("Number");
    n.addSynonym("/\\d+/");
    Concept a  = new Concept("Cat");
    a.addSynonym("kitten");
    a.addSynonym("big cat");
    t.addConcept(n);
    t.addConcept(a);
   
    String text = "The 2 kittens walked with a cat to a dog and killed 1 big fucking cat and  there was another cat";
    //t.search(text);
View Full Code Here

    BioPortalTerminology term = new BioPortalTerminology();
    term.setOntology("NCI_Thesaurus");
    long time = System.currentTimeMillis();
    // ZFA_0001234 | C0025202
    System.out.println("--- lookup ---");
    Concept c = term.lookupConcept("C0025202");
    if(c != null){
      c.printInfo(System.out);
    }
   
    System.out.println("lookup time "+(System.currentTimeMillis()-time));
   
    System.out.println("--- search ---");
View Full Code Here

  public Concept getConcept() {
    //return  new Concept(getId(),getName());
    if(concept == null){
      load();
      concept = new Concept(this);
     
      // add codes
      if(properties.containsKey(CODE)){
        Object val = properties.get(CODE);
        int i = 0;
View Full Code Here

    Source src = Source.getSource(name);
   
    BufferedReader r = null;
    try{
      r = new BufferedReader(new FileReader(file));
      Concept c = null;
      Pattern p = Pattern.compile("\"(.*)\"\\s*([A-Z_]*)\\s*(.*)?\\[.*\\]");
      for(String l=r.readLine();l != null;l=r.readLine()){
        if("[Term]".equals(l.trim())){
          addConcept(list,c);
          c = new Concept("X");
          c.addSource(src);
        }else if(c != null){
          int i = l.indexOf(':');
          if(i > -1){
            String key = l.substring(0,i).trim();
            String val = l.substring(i+1).trim();
           
            // fill in values
            if("id".equals(key)){
              c.setCode(val);
            }else if("name".equals(key)){
              c.setSynonyms(new String [0]);
              c.setName(val);
              Term t = Term.getTerm(val);
              t.setPreferred(true);
              c.addTerm(t);
            }else if("namespace".equals(key)){
              c.addSemanticType(SemanticType.getSemanticType(val));
            }else if("def".equals(key)){
              Matcher m = p.matcher(val);
              if(m.matches())
                val = m.group(1);
              c.addDefinition(Definition.getDefinition(val));
            }else if(key != null && key.matches("(exact_|narrow_|broad_)?synonym")){
              Matcher m = p.matcher(val);
              String form = null;
              if(m.matches()){
                val = m.group(1);
                form = m.group(2);
              }
              Term t = Term.getTerm(val);
              if(form != null)
                t.setForm(form);
              c.addTerm(t);
            }else if("is_a".equals(key)){
              int j = val.indexOf("!");
              if(j > -1)
                val = val.substring(0,j).trim();
              c.addRelatedConcept(Relation.BROADER,val);
              Concept pr = list.get(val);
              if(pr != null)
                pr.addRelatedConcept(Relation.NARROWER,c.getCode());
            }else if("relationship".equals(key)){
              int j = val.indexOf("!");
              int k = val.indexOf(" ");
              if(k > -1){
                String rel = val.substring(0,k).trim();
View Full Code Here

      IClass cls = (IClass)it.next();
      String code = getCode(cls,truncateURI);
      if(storage.getConceptMap().containsKey(code))
        continue;
     
      Concept concept = cls.getConcept();
      concept.setCode(code);
     
      // fix sources
      for(Source sr: concept.getSources())
        sr.setCode(getCode(sr.getCode(),truncateURI));
     
      // add relations to concept
      for(IClass c: cls.getDirectSuperClasses()){
        concept.addRelatedConcept(Relation.BROADER,getCode(c,truncateURI));
      }
     
      // add relations to concept
      for(IClass c: cls.getDirectSubClasses()){
        concept.addRelatedConcept(Relation.NARROWER,getCode(c,truncateURI));
      }
           
      // add concept
      term.addConcept(concept);
     
View Full Code Here

    storage.getInfoMap().put("name",name);
    storage.getInfoMap().put("location",file.getAbsolutePath());
    //storage.getInfoMap().put("stem.words",""+stemWords);
   
    BufferedReader reader = new BufferedReader(new FileReader(file));
    Concept c = new Concept("_");
    List<String> synonyms = new ArrayList<String>();
    int code = 0;
    for(String line=reader.readLine();line != null; line=reader.readLine()){
      line = line.trim();
     
      // junk is a concept delimeter
      if(line.length() == 0 || line.matches("_+") || line.matches("\\d+")){
        // add previous concept
        if(c != null && synonyms != null && synonyms.size() > 0){
          c.setName(synonyms.get(0));
          c.setCode(""+(++code));
          c.setSynonyms(synonyms.toArray(new String [0]));
          term.addConcept(c);
        }
        // start new concept
        c = new Concept("_");
        synonyms = new ArrayList<String>();
      }else{
        synonyms.add(line);
      }
    }
    reader.close();
   
    // handle last concept
    if(c != null && synonyms != null && synonyms.size() > 0){
      c.setName(synonyms.get(0));
      c.setCode(""+(++code));
      c.setSynonyms(synonyms.toArray(new String [0]));
      term.addConcept(c);
    }
   
    // save terminology
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Concept Information ...");
View Full Code Here

    if(offset < rowCount.get(RRFile)){
      i = 0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      Concept previousConcept = null;
      //boolean crash = false;
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_names_and_sources_file__m/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 14 ){
          String cui = fields[0].trim();
          String ts =  fields[2].trim();
          String src  = fields[11].trim();
          String text = fields[14].trim();
          String lang = fields[1].trim();
          String form = fields[12].trim();
          String code = fields[13].trim();
          String pref = fields[6].trim();
          String sup  = fields[16].trim();
         
          Source source = Source.getSource(src);
         
          // display progress bar
          if((i % 10000) == 0){
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
            storage.commit(storage.getInfoMap());
            storage.commit(storage.getTermMap());
            storage.commit(storage.getRegexMap());
            storage.commit(storage.getConceptMap());
            /*if(i > 0 && i % 500000 == 0){
              crash = true;
            }*/
          }
          i++;
         
          // filter out by language
          if(filterLang != null && !filterLang.contains(lang))
            continue;
         
          // filter out by source
          if(filterSources != null && !filterSources.contains(src)){
            if(!(code.startsWith("V-") && filterSources.contains(code.substring(2))))
              continue;
          }
         
          // honor suppress flag
          if("O".equals(sup))
            continue;
         
          // get concept from map
          Concept c = terminology.convertConcept(storage.getConceptMap().get(cui));
          if(c == null){
            // if concept is not in map, see if previous is it
            if(previousConcept != null && previousConcept.getCode().equals(cui)){
              c = previousConcept;
            }else{
              c = new Concept(cui,text);
              prefNameSource = null;
            }
          }
         
          // create a term
          Term term = new Term(text);
          term.setForm(form);
          term.setLanguage(lang);
          term.setSource(source);
          if("y".equalsIgnoreCase(pref) && "P".equalsIgnoreCase(ts))
            term.setPreferred(true);
         
          // add to concept
          c.addSynonym(text);
          c.addSource(source);
          c.addTerm(term);
          c.addCode(code, source);
         
          // set preferred name for the first time
          if(term.isPreferred()){
            // if prefered name source is not set OR
            // we have filtering and the new source offset is less then old source offset (which means higher priority)
            if(prefNameSource == null || (filterSources != null && filterSources.indexOf(src) < filterSources.indexOf(prefNameSource))){
              c.setName(text);
              prefNameSource = src;
             
            }
          }
          term = null;
         
          // now see if we pretty much got the entire concept and should put it in
          if(previousConcept != null && !previousConcept.getCode().equals(cui)){
            terminology.addConcept(previousConcept);
            storage.getInfoMap().put("max.terms.per.word",""+storage.maxTermsPerWord);
            storage.getInfoMap().put("total.terms.per.word",""+storage.totalTermsPerWord);
            /*if(crash)
              crash();*/
          }
          previousConcept = c;
        }
        storage.getInfoMap().put(RRFile,""+i);
     
      }
      // save last one
      if(previousConcept != null)
        terminology.addConcept(previousConcept);
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    // commit info terms and regex
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Term Information ...");
    storage.commit(storage.getInfoMap());
    storage.commit(storage.getTermMap());
    storage.commit(storage.getRegexMap());
    storage.commit(storage.getConceptMap());

    // now do temp word dir
    File tempDir = new File(storage.getLocation(),NobleCoderTerminology.TEMP_WORD_DIR);
    if(storage.useTempWordFolder && tempDir.exists()){
      storage.useTempWordFolder = false;
      File [] files = tempDir.listFiles();
      offset = 0;
      RRFile = NobleCoderTerminology.TEMP_WORD_DIR;
      if(storage.getInfoMap().containsKey(RRFile)){
        offset = Integer.parseInt(storage.getInfoMap().get(RRFile));
      }
      // if offset is smaller then total, read file
      if(offset < files.length){
        pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading temporary word files ...");
        pcs.firePropertyChange(LOADING_TOTAL,null,files.length);
        i = 0;
        for(File f: files){
          if(i < offset){
            i++;
            continue;
          }
          // display progress bar
          if((i % (files.length/100)) == 0){
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
          }
          i++;
         
          //load file content
          String word = f.getName();
          Set<String> terms = new HashSet<String>();
          BufferedReader rd = new BufferedReader(new FileReader(f));
          for(String l = rd.readLine();l != null; l = rd.readLine()){
            terms.add(l.trim());
          }
          rd.close();
         
          // set words
          terminology.setWordTerms(word,terms);
          storage.getInfoMap().put(RRFile,""+i);
        }
      }else{
        pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
      }
    }
   
    // save some meta information
    storage.getInfoMap().put("word.count",""+storage.getWordMap().size());
    storage.getInfoMap().put("term.count",""+storage.getTermMap().size());
    storage.getInfoMap().put("concept.count",""+storage.getConceptMap().size());
    storage.getInfoMap().put("average.terms.per.word",""+storage.totalTermsPerWord/storage.getWordMap().size());
    storage.getInfoMap().put("max.terms.per.word",""+storage.maxTermsPerWord);
   
    // good time to save term info
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Word Information ...");
   
    storage.commit(storage.getInfoMap());
    storage.commit(storage.getWordMap());
    storage.commit(storage.getWordStatMap());
   
    // lets go over definitions
    offset = 0;
    RRFile = "MRDEF.RRF";
    if(storage.getInfoMap().containsKey(RRFile)){
      offset = Integer.parseInt(storage.getInfoMap().get(RRFile));
    }
   
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
     
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i = 0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 5 ){
          String cui = fields[0].trim();
          String src = fields[4].trim();
          String text = fields[5].trim();
         
          Definition d = Definition.getDefinition(text);
          d.setSource(Source.getSource(src));
         
          // get concept from map
          Concept c = terminology.convertConcept(storage.getConceptMap().get(cui));
          if(c != null){
            c.addDefinition(d);
            // replace with new concept
            storage.getConceptMap().put(cui,c.getContent());
          }
          if((i % 10000) == 0)
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
        }
        i++;
        storage.getInfoMap().put(RRFile,""+i);
      }
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    // go over semantic types
    offset = 0;
    RRFile = "MRSTY.RRF";
    if(storage.getInfoMap().containsKey(RRFile)){
      offset = Integer.parseInt(storage.getInfoMap().get(RRFile));
    }
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i=0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 3 ){
          String cui = fields[0].trim();
          String tui = fields[1].trim();
          String text = fields[3].trim();
         
          // get concept from map
          Concept c = terminology.convertConcept(storage.getConceptMap().get(cui));
          if(c != null){
            // filter out by source
            if(filterSemTypes != null && !filterSemTypes.contains(text)){
              terminology.removeConcept(c);
            }else{
              c.addSemanticType(SemanticType.getSemanticType(text,tui));
              // replace with new concept
              storage.getConceptMap().put(cui,c.getContent());
            }
          }
        }
        if((i % 10000) == 0)
          pcs.firePropertyChange(LOADING_PROGRESS,null,i);
        i++;
        storage.getInfoMap().put(RRFile,""+i);
      }
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
   
    //process relationships?
    offset = 0;
    RRFile = "MRREL.RRF";
    if(storage.getInfoMap().containsKey(RRFile)){
      offset = Integer.parseInt(storage.getInfoMap().get(RRFile));
    }
    if(!new File(dir,RRFile).exists()){
      pcs.firePropertyChange(LOADING_MESSAGE,null,"RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist, sipping .."));
      offset = Integer.MAX_VALUE;
    }
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i=0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      List<String> filterRelations = Arrays.asList("RB","RN","PAR","CHD");
      //Concept previousConcept = null;
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file__mrdefrrf/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 5 ){
          String cui1 = fields[0].trim();
          String cui2 = fields[4].trim();
          String rel = fields[3].trim();
          String src = fields[10].trim();
         
          // filter by known source if
          if(relationSources != null && !relationSources.contains(src))
            continue;
         
          // filter by known relationship
          if(filterRelations.contains(rel) && !cui1.equals(cui2)){
            Relation re = null;
            if("RB".equals(rel) || "PAR".equals(rel))
              re = Relation.BROADER;
            else if("RN".equals(rel) || "CHD".equals(rel))
              re = Relation.NARROWER;
           
            // get concept from map
            Concept c = terminology.convertConcept(storage.getConceptMap().get(cui1));
            if(c != null && re != null){
              c.addRelatedConcept(re,cui2);
              // replace with new concept
              storage.getConceptMap().put(cui1,c.getContent());
            }
          } 
        }
        if((i % 10000) == 0)
          pcs.firePropertyChange(LOADING_PROGRESS,null,i);
View Full Code Here

      IOntology ont = onts[0];
      System.out.println(ont.getRoot()+" : "+Arrays.toString(ont.getRootClasses()));
      IClass [] clses = new IClass [] { ont.getClass("Melanoma") };
      for(IClass cls :clses){
        System.out.println(cls+" "+cls.getLocation());
        Concept c = cls.getConcept();
        c.printInfo(System.out);
      }
    }
  }
View Full Code Here

TOP

Related Classes of edu.pitt.dbmi.nlp.noble.terminology.Concept

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.