Package edu.pitt.terminology.lexicon

Examples of edu.pitt.terminology.lexicon.Source


  private void parameterizeSources() {
    if (sources != null && !sources.isEmpty()) {
      final Source[] sourceFilters = new Source[sources.size()];
      int tdx = 0;
      for (String sab : sources) {
        sourceFilters[tdx++] = new Source(sab);
      }
      coder.setFilterSources(sourceFilters);
    } else {
      coder.setFilterLanguages(null);
    }
View Full Code Here


    // save meta information
    infoMap.put("name",ontology.getName());
    infoMap.put("descripion",ontology.getDescription());
    infoMap.put("version",ontology.getVersion());
    infoMap.put("uri",ontology.getURI().toASCIIString());
    Source src = Source.getSource(ontology.getName());
    src.setDescription(ontology.getDescription());
    sourceMap.put(ontology.getName(),src);
   
    // get all classes
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Iterating Over Ontology Classes ...");
    pcs.firePropertyChange(LOADING_TOTAL,null,0);
View Full Code Here

          i++;
          continue;
        }
        //http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.source_information_file__mrsabrrf/?report=objectonly
        String [] fields = line.split("\\|");
        Source src = new Source(fields[3].trim());
        src.setDescription(fields[4]); // fields.length-1
        if(filterSources != null && filterSources.contains(src.getName())){
          sourceMap.put(src.getName(),src);
        }
        i++;
        infoMap.put(RRFile,""+i);
      }
      r.close();
    }else{
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Skipping "+RRFile+" file ...");
    }
     
   
    // save meta information
    pcs.firePropertyChange(LOADING_MESSAGE,null,"Saving Meta Information ...");
    ((JDBMMap) infoMap).commit();
    ((JDBMMap) sourceMap).commit();
   
    // lets first build a map of concepts using existing concept map
    useTempWordFolder = true;
    String prefNameSource = null;
    offset = 0;
    RRFile = "MRCONSO.RRF";
    if(!new File(dir,RRFile).exists())
      throw new TerminologyException("RRF file "+(new File(dir,RRFile).getAbsolutePath()+" does not exist!"));
   
    if(infoMap.containsKey(RRFile)){
      offset = Integer.parseInt(infoMap.get(RRFile));
    }
    // if offset is smaller then total, read file
    if(offset < rowCount.get(RRFile)){
      i = 0;
      pcs.firePropertyChange(LOADING_MESSAGE,null,"Loading "+RRFile+" file ...");
      pcs.firePropertyChange(LOADING_TOTAL,null,rowCount.get(RRFile));
      r = new BufferedReader(new FileReader(new File(dir,RRFile)));
      Concept previousConcept = null;
      //boolean crash = false;
      for(String line = r.readLine(); line != null; line = r.readLine()){
        if(i < offset){
          i++;
          continue;
        }
        // parse each line ref: http://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_names_and_sources_file__m/?report=objectonly
        String [] fields = line.split("\\|");
        if(fields.length >= 14 ){
          String cui = fields[0].trim();
          String ts =  fields[2].trim();
          String src  = fields[11].trim();
          String text = fields[14].trim();
          String lang = fields[1].trim();
          String form = fields[12].trim();
          String code = fields[13].trim();
          String pref = fields[6].trim();
          String sup  = fields[16].trim();
         
          Source source = Source.getSource(src);
         
          // display progress bar
          if((i % 10000) == 0){
            pcs.firePropertyChange(LOADING_PROGRESS,null,i);
            ((JDBMMap) infoMap).commit();
View Full Code Here

  }
 
  public Source[] getSources() {
    if(sourceMap != null && !sourceMap.isEmpty())
      return sourceMap.values().toArray(new Source [0]);
    return new Source[]{new Source(getName(),getDescription(),""+getURI())};
  }
View Full Code Here

   
    // get child element
    for(Element e: XMLUtils.getChildElements(element)){
      if("Sources".equals(e.getTagName())){
        for(Element cc: XMLUtils.getElementsByTagName(e,"Source")){
          Source c = new Source("");
          c.fromElement(cc);
          sourceMap.put(c.getName(),c);
        }
      }else if("Relations".equals(e.getTagName())){
        //NOOP
      }else if("Languages".equals(e.getTagName())){
        infoMap.put("languages",e.getTextContent().trim());
      }else if("Roots".equals(e.getTagName())){
        for(String r: e.getTextContent().trim().split(",")){
          rootMap.put(r.trim(),"");
        }
      }else if("Description".equals(e.getTagName())){
        infoMap.put("description",e.getTextContent().trim());
      }else if("Concepts".equals(e.getTagName())){
        for(Element cc: XMLUtils.getElementsByTagName(e,"Concept")){
          Concept c = new Concept("");
          c.fromElement(cc);
          addConcept(c);
        }
      }else if("Options".equals(e.getTagName())){
        Properties p = new Properties();
        for(Element op: XMLUtils.getElementsByTagName(e,"Option")){
View Full Code Here

  public Map<String,Concept> loadOBO(File file) throws IOException, TerminologyException {
    Map<String,Concept> list = new LinkedHashMap<String,Concept>();
    String name = file.getName();
    if(name.endsWith(".obo"))
      name = name.substring(0,name.length()-4);
    Source src = Source.getSource(name);
   
    BufferedReader r = null;
    try{
      r = new BufferedReader(new FileReader(file));
      Concept c = null;
      Pattern p = Pattern.compile("\"(.*)\"\\s*([A-Z_]*)\\s*(.*)?\\[.*\\]");
      for(String l=r.readLine();l != null;l=r.readLine()){
        if("[Term]".equals(l.trim())){
          addConcept(list,c);
          c = new Concept("X");
          c.addSource(src);
        }else if(c != null){
          int i = l.indexOf(':');
          if(i > -1){
            String key = l.substring(0,i).trim();
            String val = l.substring(i+1).trim();
           
            // fill in values
            if("id".equals(key)){
              c.setCode(val);
            }else if("name".equals(key)){
              c.setSynonyms(new String [0]);
              c.setName(val);
              Term t = Term.getTerm(val);
              t.setPreferred(true);
              c.addTerm(t);
            }else if("namespace".equals(key)){
              c.addSemanticType(SemanticType.getSemanticType(val));
            }else if("def".equals(key)){
              Matcher m = p.matcher(val);
              if(m.matches())
                val = m.group(1);
              c.addDefinition(Definition.getDefinition(val));
            }else if(key != null && key.matches("(exact_|narrow_|broad_)?synonym")){
              Matcher m = p.matcher(val);
              String form = null;
              if(m.matches()){
                val = m.group(1);
                form = m.group(2);
              }
              Term t = Term.getTerm(val);
              if(form != null)
                t.setForm(form);
              c.addTerm(t);
            }else if("is_a".equals(key)){
              int j = val.indexOf("!");
              if(j > -1)
                val = val.substring(0,j).trim();
              c.addRelatedConcept(Relation.BROADER,val);
              Concept pr = list.get(val);
              if(pr != null)
                pr.addRelatedConcept(Relation.NARROWER,c.getCode());
            }else if("relationship".equals(key)){
              int j = val.indexOf("!");
              int k = val.indexOf(" ");
              if(k > -1){
                String rel = val.substring(0,k).trim();
                if(j > -1)
                  val = val.substring(k,j).trim();
                c.addRelatedConcept(Relation.getRelation(rel),val);
              }
            }else if("is_obsolete".equals(key)){
              if(Boolean.parseBoolean(val)){
                c = null;
              }
            }else if("consider".equals(key)){
              // NOOP only relevant when term is obsolete
            }else if("comment".equals(key)){
              // NOOP only relevant when term is obsolete
            }else if("alt_id".equals(key)){
              c.addCode(val,Source.getSource(""));
            }else if("subset".equals(key)){
              // NOOP, don't know what to do with that
            }else if("xref".equals(key)){
              // NOOP, handle external references
            }
          }
        }else if(l.startsWith("default-namespace:")){
          src.setDescription(l.substring("default-namespace:".length()+1).trim());
        }
      }
      addConcept(list,c);
    }catch(IOException ex){
      throw ex;
View Full Code Here

        mrcon.write(cui+I+lat+I+ts+I+lui+I+stt+I+sui+I+str+I+lrl+I+"\n");
        mrcon.flush();
       
        // save term information in MRSO
        // C0002871|L0002871|S0013742|SNOMEDCT|OP|154786001|9|
        Source src = t.getSource();
        if(src == null && c.getSources().length > 0)
          src = c.getSources()[0];
        String sab  = src != null?src.getName():"";
        String tty = t.isPreferred()?"PT":"NP";
        //String tty = t.getForm() != null && !compat?t.getForm():"SY";
        String scode = (c.getCodes() != null && src != null && c.getCodes().containsKey(src))?""+c.getCodes().get(src):cui;
       
        mrso.write(cui+I+lui+I+sui+I+sab+I+tty+I+scode+I+lrl+I+"\n");
        mrso.flush();
      }
     
      // save MRDEF information in MRDEF
      // C0002871|CSP|subnormal levels or function of erythrocytes, resulting in symptoms of tissue hypoxia.|
      for(Definition d: c.getDefinitions()){
        Source src = d.getSource();
        if(src == null && c.getSources().length > 0)
          src = c.getSources()[0];
        String sab = src != null?src.getName():"";
        String def = d.getDefinition();
        mrdef.write(cui+I+sab+I+def+I+"\n");
        mrdef.flush();
      }
      // save MRSTY information
      // C0000005|T121|Pharmacologic Substance||
      if(compat){
        String tui = "T071";
        String sty = "Entity";
        mrsty.write(cui+I+tui+I+sty+I+"\n");
        mrsty.flush();
      }else{
        for(SemanticType st: c.getSemanticTypes()){
          String tui = getTUI(st);
          String sty = st.getName();
          mrsty.write(cui+I+tui+I+sty+I+"\n");
          mrsty.flush();
        }
      }
      // add to sources
      Collections.addAll(sources,c.getSources())
     
      // save terminology
      if(term == null && c.getTerminology() != null)
        term = c.getTerminology();
     
      // keep track of translation
      codes.write(cui+"="+code+"\n");
      codes.flush();
    }
   
    // if possible use sources from terminology, cuase they have more data
    if(term != null && term.getSources().length > 0){
      Set<Source> tsources = new TreeSet<Source>();
      for(Source s: term.getSources()){
        if(sources.contains(s)){
          tsources.add(s);
        }
      }
      sources = tsources;
    }
   
    // write out sources
    // CL432995|C1140168|NCI2011_12E|NCI|National Cancer Institute Thesaurus, 2011_12E|NCI|2011_12E|||201112||
    // Sherri de Coronado;Center for Bioinformatics, National Cancer Institute;2115 E. Jefferson St.;6th Floor;
    // Rockville;MD;USA;20892-8335;925-377-5960;decorons@osp.nci.nih.gov|Sherri de Coronado;Center for Bioinformatics,
    // National Cancer Institute;2115 E. Jefferson St.;6th Floor;Rockville;MD;USA;20892-8335;925-377-5960;decorons@osp.nci.nih.gov
    // |0|205092|87562|FULL-NOSIB-MULTIPLE-IGNORE-RELA|AB,AD,BN,CA2,CA3,CCN,CCS,CNU,CSN,DN,FBD,HD,OP,PT,SY|
    // Accepted_Therapeutic_Use_For,BioCarta_ID,CAS_Registry,CHEBI_ID,Chemical_Formula,Contributing_Source,Design_Note,
    // EntrezGene_ID,Essential_Amino_Acid,Essential_Fatty_Acid,Extensible_List,FDA_Table,FDA_UNII_Code,GO_Annotation,
    // GenBank_Accession_Number,Gene_Encodes_Product,Homologous_Gene,ICD-O-3_Code,INFOODS,KEGG_ID,MGI_Accession_ID,
    // Macronutrient,Micronutrient,NCBI_Taxon_ID,NSC_Code,Neoplastic_Status,Nutrient,OMIM_Number,PDQ_Closed_Trial_Search_ID,
    // PDQ_Open_Trial_Search_ID,PID_ID,PubMedID_Primary_Reference,Swiss_Prot,Tolerable_Level,USDA_ID,US_Recommended_Intake,Unit,
    // Use_For,miRBase_ID|ENG|UTF-8|Y|Y|NCI Thesaurus|National Cancer Institute, National Institutes of Health;NCI Thesaurus;
    // Sherri de Coronado, decorons@osp.nci.nih.gov;December 2011, Protege version;Rockville, MD|
    for(Source src : sources){ 
      String def = src.getDescription();
      if(def != null){
        int x = def.indexOf(".");
        if(x > -1){
          def = def.substring(0,x);
        }
      }
      String vcui = "";
      String rcui = src.getCode() != null?src.getCode():"";
      String vsab = "";// versioned abbreviation
      String rsab = src.getName(); // abbreviation
      String son  = def != null?def:""; // official name
      String sf = src.getName(); // source family
      String sver = "";
      String vstart = "";
      String vend = "";
      String imeta ="";
      String rmeta = "";
View Full Code Here

      String code = c.getCode();
     
      // save term information in MRCONSO
      // C0000005|ENG|P|L0187013|PF|S2192303|Y|A4332670||M0019694|D012711|MSH|EN|D012711|(131)I-MAA|0|N||
      for(Term t: c.getTerms()){
        Source src = t.getSource();
        if(src == null && c.getSources().length > 0)
          src = c.getSources()[0];
        String lat = t.getLanguage() == null?"ENG":t.getLanguage();
        String ts =  t.isPreferred()?"P":"S";
        String lui = getLUI(t.getText());
        String stt = t.isPreferred()?"PF":"VO";
        String sui = getSUI(t.getText());
        String pref = t.isPreferred()?"Y":"N";
        String aui = String.format("A%07d",atomCount);
        String saui = "";
        String scui = "";
        String sdui = "";
        String sab  = src != null?src.getName():"";
        String tty = t.getForm() != null?t.getForm():"SY";
        String scode = (c.getCodes() != null && src != null && c.getCodes().containsKey(src))?""+c.getCodes().get(src):cui;
        String str = t.getText();
        String srl = "0";
        String sup  = "N";
        String cvf = "";
       
        // write out
        mrconso.write(cui+I+lat+I+ts+I+lui+I+stt+I+sui+I+pref+I+aui+I+saui+I+scui+I+sdui+I+sab+I+tty+I+scode+I+str+I+srl+I+sup+I+cvf+"\n");
        mrconso.flush();
        atomCount ++;
      }
      // save MRDEF information in MRDEF
      // C0000107|A3857241|AT22515555||MSH|An ANGIOTENSIN II analog which acts as a highly specific inhibitor of ANGIOTENSIN TYPE 1 RECEPTOR.|N||
      for(Definition d: c.getDefinitions()){
        Source src = d.getSource();
        if(src == null && c.getSources().length > 0)
          src = c.getSources()[0];
        String aui = "";
        String atui = "";
        String satui = "";
        String sab = src != null?src.getName():"";
        String def = d.getDefinition();
        String sup = "N";
        String cvf = "";
        mrdef.write(cui+I+aui+I+atui+I+satui+I+sab+I+def+I+sup+I+cvf+"\n");
        mrdef.flush();
      }
      // save MRSTY information
      // C0000005|T121|A1.4.1.1.1|Pharmacologic Substance|AT16627324||
      for(SemanticType st: c.getSemanticTypes()){
        String tui = getTUI(st);
        String stn = "";
        String sty = st.getName();
        String atui = "";
        String cvf = "";
        mrsty.write(cui+I+tui+I+stn+I+sty+I+atui+I+cvf+"\n");
        mrsty.flush();
      }
      // add to sources
      Collections.addAll(sources,c.getSources())
     
      // save terminology
      if(term == null && c.getTerminology() != null)
        term = c.getTerminology();
     
      // keep track of translation
      codes.write(cui+"="+code+"\n");
      codes.flush();
    }
   
    // if possible use sources from terminology, cuase they have more data
    if(term != null && term.getSources().length > 0){
      Set<Source> tsources = new TreeSet<Source>();
      for(Source s: term.getSources()){
        if(sources.contains(s)){
          tsources.add(s);
        }
      }
      sources = tsources;
    }
   
    // write out sources
    // CL432995|C1140168|NCI2011_12E|NCI|National Cancer Institute Thesaurus, 2011_12E|NCI|2011_12E|||201112||
    // Sherri de Coronado;Center for Bioinformatics, National Cancer Institute;2115 E. Jefferson St.;6th Floor;
    // Rockville;MD;USA;20892-8335;925-377-5960;decorons@osp.nci.nih.gov|Sherri de Coronado;Center for Bioinformatics,
    // National Cancer Institute;2115 E. Jefferson St.;6th Floor;Rockville;MD;USA;20892-8335;925-377-5960;decorons@osp.nci.nih.gov
    // |0|205092|87562|FULL-NOSIB-MULTIPLE-IGNORE-RELA|AB,AD,BN,CA2,CA3,CCN,CCS,CNU,CSN,DN,FBD,HD,OP,PT,SY|
    // Accepted_Therapeutic_Use_For,BioCarta_ID,CAS_Registry,CHEBI_ID,Chemical_Formula,Contributing_Source,Design_Note,
    // EntrezGene_ID,Essential_Amino_Acid,Essential_Fatty_Acid,Extensible_List,FDA_Table,FDA_UNII_Code,GO_Annotation,
    // GenBank_Accession_Number,Gene_Encodes_Product,Homologous_Gene,ICD-O-3_Code,INFOODS,KEGG_ID,MGI_Accession_ID,
    // Macronutrient,Micronutrient,NCBI_Taxon_ID,NSC_Code,Neoplastic_Status,Nutrient,OMIM_Number,PDQ_Closed_Trial_Search_ID,
    // PDQ_Open_Trial_Search_ID,PID_ID,PubMedID_Primary_Reference,Swiss_Prot,Tolerable_Level,USDA_ID,US_Recommended_Intake,Unit,
    // Use_For,miRBase_ID|ENG|UTF-8|Y|Y|NCI Thesaurus|National Cancer Institute, National Institutes of Health;NCI Thesaurus;
    // Sherri de Coronado, decorons@osp.nci.nih.gov;December 2011, Protege version;Rockville, MD|
    for(Source src : sources){ 
      String def = src.getDescription();
      if(def != null){
        int x = def.indexOf(".");
        if(x > -1){
          def = def.substring(0,x);
        }
      }
      String vcui = "";
      String rcui = src.getCode() != null?src.getCode():"";
      String vsab = "";// versioned abbreviation
      String rsab = src.getName(); // abbreviation
      String son  = def != null?def:""; // official name
      String sf = src.getName(); // source family
      String sver = "";
      String vstart = "";
      String vend = "";
      String imeta ="";
      String rmeta = "";
      String slc = "";
      String scc = "";
      String srl = "0";
      String tfr = ""; //term frequence
      String cfr = ""; // concept frequency
      String cxty = "";
      String ttyl = "";
      String atnl = "";
      String lat = "ENG";
      String cenc = "UTF-8";
      String curver = "Y";
      String sabin = "Y";
      String ssn = son;
      String scit= src.getDescription();
           
      mrsab.write(vcui+I+rcui+I+vsab+I+rsab+I+son+I+sf+I+sver+I+vstart+I+vend+I+imeta+I+rmeta+I+slc+I+scc+I+srl+I+tfr+I+cfr+
            I+cxty+I+ttyl+I+atnl+I+lat+I+cenc+I+curver+I+sabin+I+ssn+I+scit+"\n");
     
    }
View Full Code Here

        item.setType(TemplateItem.TYPE_ORGAN);
        String code = getCode((String) c.getPropertyValue(ont.getProperty("code")),true);
        if(code != null){
          String cd = (code.indexOf("#") > -1)?code.substring(0,code.lastIndexOf("#")):code;
          String nm = (cd.indexOf("/") > -1)?cd.substring(cd.lastIndexOf("/")+1):cd;
          Source src = new Source(nm, "", cd);
          try {
            template.getTerminology().lookupConcept(item.getConcept().getCode()).addCode(code,src);
          } catch (TerminologyException e) {
            e.printStackTrace();
          }
View Full Code Here

        .getConfigParameterValue("source.filter");
    if (!StringUtils.isEmpty(colonSeparatedSources)) {
      final String[] sabs = colonSeparatedSources.split(":");
      final Source[] sourceFilters = new Source[sabs.length];
      for (int tdx = 0; tdx < sabs.length; tdx++) {
        sourceFilters[tdx] = new Source(sabs[tdx]);
      }
      terminology.setFilterSources(sourceFilters);
    }
  }
View Full Code Here

TOP

Related Classes of edu.pitt.terminology.lexicon.Source

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.