Package data.cerevisiae.test.compseq

Source Code of data.cerevisiae.test.compseq.SeqComposition

/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/

package data.cerevisiae.test.compseq;

import fork.lib.base.file.FileName;
import fork.lib.base.file.io.txt.ReadTable;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedReader;
import fork.lib.bio.anno.genomic.region.DirectionalGenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.bio.seq.GetSequenceFasta;
import fork.lib.bio.seq.NucleotideSequenceParser;
import fork.lib.math.applied.stat.FrequencyCount;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

/**
*
* @author man-mqbpjmg4
*/
public class SeqComposition {
   
protected File bedf;
protected HashMap<String,GetSequenceFasta> chrGet= new HashMap<>();
protected HashMap<String,GenomicRegion> idReg;
   
   
    public SeqComposition(File bedf)throws Exception {
        this.bedf=bedf;
        init();
    }
   
protected void init() throws Exception {
    File dir= new File("E:\\muxingu\\mystudy\\phd\\files\\anno\\genomes\\sacCer1");
    File[] fs= dir.listFiles();
    for( int i=0; i<fs.length; i++ ){
        File f= fs[i];
        String fn= FileName.getBaseName(f);
        if(fn.indexOf("chr")==0){
            chrGet.put(fn, new GetSequenceFasta(f));;
        }
    }
    idReg= new BedReader(bedf).getIDToRegionMap();
}
   
   
   
public void printOutput(ArrayList<String> ids)throws Exception {
    FrequencyCount<Character> fc= new FrequencyCount<>();

    for( int i=0; i<ids.size(); i++ ){
        String id = ids.get(i);
        DirectionalGenomicRegion gr= (DirectionalGenomicRegion) idReg.get(id);
        if(gr==null){
            continue;
        }
        GetSequenceFasta gs= chrGet.get(gr.chr);
        if(gs!=null){
            String seq= gs.getSequence(gr);
            if(gr.isOnReverseStrand()){
                seq= NucleotideSequenceParser.parseSequence(seq).complementary().toString();
            }
            for( int j=0; j<seq.length(); j++ ){
                fc.add(seq.charAt(j));
            }
        }
    }
    int tot= fc.totalCounts();
    Object[] ks= fc.getSortedKeys();
    for( int i=0; i<ks.length; i++ ){
        Object k= ks[i];
        int c= fc.getFrequencyCounts().get(k);
        System.out.println("   "+k+" "+ (double)Math.round((double)c/tot*1000)/1000+"   "+ c);
    }
    System.out.println();
}
   








public static void main(String[] args) throws Exception { //debug
    File dir= Dirs.getFile("dir");
   
    File bed= new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_five_0.150.bed");
   
    SeqComposition cc= new SeqComposition(bed);
   
   
    File d= new File("E:\\muxingu\\mystudy\\phd\\progs\\PhdProject\\plot");
   
    //cc.printOutput(new ReadTable(new File(d+"/xxx_cz5.txt")).getTable().getColumn(0));
    //cc.printOutput(new ReadTable(new File(d+"/xxx_cnz5.txt")).getTable().getColumn(0));
    cc.printOutput(new ReadTable(bed).getTable().getColumn(3));
   
   
    cc= new SeqComposition(new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_cds_300.300.bed"));
    cc.printOutput(new ReadTable(bed).getTable().getColumn(3));
}
   
   
   
}
TOP

Related Classes of data.cerevisiae.test.compseq.SeqComposition

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.