class Bio::BFRTools::BFRRegion

Constants

BASES

Attributes

avg_cov_bulk_1[R]
avg_cov_bulk_2[R]
bases_bulk_1[R]
bases_bulk_2[R]
bulk_1_sequence[R]
bulk_2_sequence[R]
coverages_1[R]
coverages_2[R]
parental_1_sequence[R]
parental_2_sequence[R]
ratios_bulk_1[R]
ratios_bulk_2[R]
snp_count[R]

Public Class Methods

new(opts) click to toggle source
# File lib/bio/BFRTools.rb, line 111
def initialize(opts)
  opts = { :min_cov=>20, :max_snp_1kbp => 5 }.merge!(opts)
  reg = Bio::DB::Fasta::Region.parse_region(opts[:region])
  self.entry = reg.entry
  self.start = reg.start
  self.end   = reg.end
  @BFRs = nil
  opts[:region] = reg
  @container = opts[:container]

  parental_1_reg = @container.parental_1_sam.fetch_region(opts)
  parental_2_reg = @container.parental_2_sam.fetch_region(opts)
  bulk_1_reg = @container.bulk_1_sam.fetch_region(opts)
  bulk_2_reg = @container.bulk_2_sam.fetch_region(opts)
  


  @parental_1_sequence = parental_1_reg.consensus
  @parental_2_sequence = parental_2_reg.consensus
  
  @bulk_1_sequence = bulk_1_reg.consensus
  @bulk_2_sequence = bulk_2_reg.consensus

  @snp_count = Container.snps_between( @parental_1_sequence , @parental_2_sequence )

  @ratios_bulk_1 = bulk_1_reg.base_ratios
  @ratios_bulk_2 = bulk_2_reg.base_ratios

  @bases_bulk_1 = bulk_1_reg.bases
  @bases_bulk_2 = bulk_2_reg.bases

  @avg_cov_bulk_1 = bulk_1_reg.average_coverage
  @avg_cov_bulk_2 = bulk_2_reg.average_coverage

  @coverages_1 =  bulk_1_reg.coverages
  @coverages_2 =  bulk_2_reg.coverages

end

Public Instance Methods

base_count_for_base(base, base_matrix) click to toggle source
# File lib/bio/BFRTools.rb, line 349
def base_count_for_base(base, base_matrix)
  bases = Array.new
  for i in (0..base_matrix.size-1)
    bases << base_matrix[i][base]
  end
  bases
end
base_ratios_for_base(base, ratios_matrix) click to toggle source
# File lib/bio/BFRTools.rb, line 341
def base_ratios_for_base(base, ratios_matrix)
  ratios = Array.new
  for i in (0..ratios_matrix.size-1)
    ratios << ratios_matrix[i][base]
  end
  ratios
end
bfrs() click to toggle source
# File lib/bio/BFRTools.rb, line 193
def bfrs
  return @BFRs if @BFRs
  @BFRs = Hash.new
  
  [:first, :second].each do | reference |
    @BFRs[reference] = Hash.new
    BASES.each do |base|
      @BFRs[reference][base] = Array.new
    end
  end
  

  for i in (0..self.size-1)
    ratios_1 = @ratios_bulk_1[i]
    ratios_2 = @ratios_bulk_2[i]
    BASES.each do |base|
      
      if ratios_1[base] == 0 and ratios_2[base] == 0
        bfr1 = 0
        bfr2  = 0
      elsif ratios_1[base] == 0
       bfr1  = 0
       bfr2 = Float::INFINITY
      elsif ratios_2[base] == 0
       bfr1 = Float::INFINITY
        bfr2 = 0
        #bfr = Float::INFINITY
      else
        bfr1  =  ratios_1[base] / ratios_2[base]
        bfr2 =  ratios_2[base] / ratios_1[base]
      end
      @BFRs[:first][base] << bfr1
      @BFRs[:second][base] << bfr2
    end
  end
  @BFRs
end
get_bfr_line(position, base, reference) click to toggle source
# File lib/bio/BFRTools.rb, line 231
def get_bfr_line(position, base, reference)
  if(reference == :first)
    informative = @container.parental_1_name
    ref_base = @parental_2_sequence[position]
  elsif(reference == :second )
    informative = @container.parental_2_name
    ref_base = @parental_1_sequence[position]
  else
    raise BFRToolsException.new ("The reference for the line should be :first or :second, but was " + reference.to_s )
  end
  
  relative_position = self.start +  position 
  
  bfr = bfrs[reference][base][position]
  cov_1 = @coverages_1[position]
  cov_2 = @coverages_2[position]
  ratios_1 = @ratios_bulk_1[position][base]
  ratios_2 = @ratios_bulk_2[position][base]
  base_1_count = @bases_bulk_1[position][base.to_sym]
  base_2_count = @bases_bulk_2[position][base.to_sym]
  #puts "bases_1 #{@bases_bulk_1[position].to_s}"
  #puts "bases_2 #{@bases_bulk_2[position].to_s}"
  line = String.new
  line << @container.parental_1_name  << "\t" << @container.parental_2_name << "\t" <<  @container.bulk_1_name << "\t" << @container.bulk_2_name << "\t" << self.entry << "\t"
  line << ref_base  << "\t" << relative_position.to_s 
  line << "\t" << base.to_s << "\t" 
  line << bfr.round(2).to_s << "\t"  
  line << cov_1.to_s << "\t" << cov_2.to_s  << "\t" 
  line << informative 
  line << "\t" << ratios_1.round(2).to_s << "\t" << ratios_2.round(2).to_s
  line << "\t" << base_1_count.to_s << "\t" << base_2_count.to_s
  line
end
get_bfr_lines(opts = {}) click to toggle source
# File lib/bio/BFRTools.rb, line 150
def get_bfr_lines(opts = {})
 
  opts = { :min_cov=>20, :max_snp_1kbp => 5 }.merge!(opts)
  p opts.inspect
  region = self
  line  = String.new
  info = Array.new
  
  for i in (0..region.size-1)
    
    if region.coverages_1[i] > opts[:min_cov] and region.coverages_2[i] > opts[:min_cov]
      BASES.each do |base|

        info.clear
        if  Bio::NucleicAcid.is_valid( region.parental_1_sequence[i],  base.to_s  ) and 
          not  Bio::NucleicAcid.is_valid( region.parental_2_sequence[i],  base.to_s  )
          info << :first
        end

        if   Bio::NucleicAcid.is_valid( region.parental_2_sequence[i],  base.to_s  ) and 
          not Bio::NucleicAcid.is_valid( region.parental_1_sequence[i],  base.to_s  )
          info << :second
        end


        for informative in info
          l = region.get_bfr_line(i, base, informative)
          puts l << "\n"
          line << l << "\n"

          #     output.print  line , "\n"
        end
      end
    end
  end
  line
end
snp_1kbp() click to toggle source
# File lib/bio/BFRTools.rb, line 189
def snp_1kbp
  @snp_count.to_f * 1000 / self.size.to_f
end
to_csv() click to toggle source
# File lib/bio/BFRTools.rb, line 311
def to_csv
  out = String.new
  out << "Parental 1," << @container.parental_1_name << "\n"
  out << "Parental 2," << @container.parental_2_name << "\n"
  out << "Bulk 1, " <<  @container.bulk_1_name << "\n"
  out << "Bulk 2," << @container.bulk_2_name << "\n"
  out << "Positions," << (1..self.size).to_a.join(",") << "\n"
  out << "Parental 1 consensus," << @parental_1_sequence .split(//).join(",") << "\n"
  out << "Parental 2 consensus," << @parental_2_sequence .split(//).join(",") << "\n"
  out << "Bulk 1 consensus," << @bulk_1_sequence .split(//).join(",") << "\n"
  out << "Bulk 1 coverage," << @coverages_1.join(",") << "\n"
  #  puts BASES
  BASES.each do |base|
    out << "Bases Bulk 1"<< base.to_s << "," <<  base_count_for_base(base, @bases_bulk_1).join(",") << "\n"
    out << "Ratios Bulk 1 " << base.to_s << "," << base_ratios_for_base(base, @ratios_bulk_1).join(",")  << "\n"
  end
  out << "Bulk 2 consensus," << @bulk_2_sequence .split(//).join(",") << "\n"
  out << "Bulk 2 coverage," << @coverages_2.join(",") << "\n"

  BASES.each do |base|
    out << "Bases Bulk 2   "<< base.to_s << "," <<  base_count_for_base(base, @bases_bulk_2).join(",") << "\n"
    out << "Ratios Bulk 2 " << base.to_s << "," << base_ratios_for_base(base, @ratios_bulk_2).join(",")  << "\n"
  end
  BASES.each do |base|
    out << "BFRs" << base.to_s << ","  << bfrs[:first][base].join(",") << "\n"
  end
  #     << "\t" << @container.bulk_2_name << "\t" << self.entry << "\t"
  out
end
to_json(opts) click to toggle source
# File lib/bio/BFRTools.rb, line 274
def to_json (opts)
  #      puts JSON.dump self
  #      JSON.dump self
  #{}"{\"firstName\": \"John\"}"
  out = String.new
  out << "{" 
  out << "\"Parental_1\" : \"" << @container.parental_1_name << "\"\n"
  out << "\"Parental 2\" : \"" << @container.parental_2_name << "\"\n"
  out << "\"Bulk 1\" :  \"" <<  @container.bulk_1_name << "\"\n"
  out << "\"Bulk 2\" : \"" << @container.bulk_2_name << "\"\n"
  out << "\"Positions\" : " << (1..self.size).to_a.to_json << "\n" #TODO: Make this for any subsection, so we can subquery in case we are working on something bigger
  out << "\"Parental_1_consensus\":" << @parental_1_sequence .split(//).to_json << "\n"
  out << "\"Parental_2_consensus\":" << @parental_2_sequence .split(//).to_json << "\n"
  out << "\"Bulk_1_consensus\":" << @bulk_1_sequence .split(//).to_json << "\n"
  out << "\"Bulk_1_coverage\":" << @coverages_1.to_json << "\n"
  #  puts BASES
  
  BASES.each do |base|
    out << "\"Bases_Bulk_1"  << base.to_s << "\":" <<  base_count_for_base(base, @bases_bulk_1).join(",") << "\n"
    out << "\"Ratios_Bulk_1" << base.to_s << "\":" << base_ratios_for_base(base, @ratios_bulk_1).join(",")  << "\n"
  end
  out << "\"Bulk_2_consensus\":" << @bulk_2_sequence .split(//).join(",") << "\n"
  out << "\"Bulk_2_coverage\":" << @coverages_2.join(",") << "\n"

  BASES.each do |base|
    out << "\"Bases_Bulk_2"<< base.to_s << "\":" <<  base_count_for_base(base, @bases_bulk_2).join(",") << "\n"
    out << "\"Ratios_Bulk_2" << base.to_s << "\":" << base_ratios_for_base(base, @ratios_bulk_2).join(",")  << "\n"
  end
  BASES.each do |base|
    out << "\"BFR" << base.to_s << "\":"  << bfrs[:first][base].join(",") << "\n"
  end
  #     << "\t" << @container.bulk_2_name << "\t" << self.entry << "\t"
  out << "}"
  out

end
to_multi_fasta() click to toggle source
# File lib/bio/BFRTools.rb, line 265
def to_multi_fasta
  fasta_string = String.new
  fasta_string << ">"<< self.to_s << ":" << @container.parental_1_name << "\n" << @parental_1_sequence << "\n"
  fasta_string << ">"<< self.to_s << ":" << @container.parental_2_name << "\n" << @parental_2_sequence << "\n"
  fasta_string << ">"<< self.to_s << ":" << @container.bulk_1_name << "\n" << @bulk_1_sequence << "\n"
  fasta_string << ">"<< self.to_s << ":" << @container.bulk_2_name << "\n" << @bulk_2_sequence << "\n"
  fasta_string 
end