class Bio::PolyploidTools::ExonContainer

Constants

BASES

Attributes

chromosomes[R]
flanking_size[RW]
gene_models_db[R]
max_hits[RW]
parental_1_name[R]
parental_1_sam[R]
parental_2_name[R]
parental_2_sam[R]
parents[R]
primer_3_min_seq_length[RW]
snp_map[R]

Public Class Methods

new() click to toggle source

Sets the reference file for the gene models

# File lib/bio/PolyploidTools/ExonContainer.rb, line 13
def initialize
  @parents=Hash.new
  @snp_map = Hash.new 
  @primer_3_min_seq_length = 50
  @max_hits = 10
end

Public Instance Methods

add_alignments(opts=Hash.new) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 181
def add_alignments(opts=Hash.new) 
  opts = { :min_identity=>90, filter_best:false }.merge!(opts)
  exonerate_filename = opts[:exonerate_file]
  arm_selection = opts[:arm_selection]
  filter_best = opts[:filter_best]

  unless arm_selection
    arm_selection = lambda do | contig_name |
      ret = contig_name[0,3]       
      return ret
    end
  end


  File.open(exonerate_filename) do |f|
    f.each_line do | line |
      record = Bio::DB::Exonerate::Alignment.parse_custom(line)
      if  record and record.identity >= opts[:min_identity]
        snp_array = @snp_map[record.query_id]
        if snp_array != nil
          snp_array.each do |snp|                            
            if snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end)
              begin
                exon = record.exon_on_gene_position(snp.position)
                snp.add_exon(exon, arm_selection.call(record.target_id), filter_best:filter_best)
              rescue Bio::DB::Exonerate::ExonerateException
                $stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}"
              end
            end
          end
        end
      end
    end
  end
  remove_alignments_over_max_hits
end
add_chromosome_arm(opts) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 70
def add_chromosome_arm(opts)
  @chromosomes = Hash.new unless @chromosomes
  name = opts[:name]
  path = opts[:reference_path]
  path = opts[:alig_path]
  chromosomes[name] = Bio::DB::Fasta::FastaFile.new(fasta: path)
end
add_parental(opts=Hash.new) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 232
def add_parental(opts=Hash.new) 
  # opts = { :name=>opts[:path]}.merge!(opts)
  sam = nil
  name = opts[:name] ? opts[:name] : "Unknown"
  if opts[:path] 
    path = opts[:path]
    name = opts[:name] ? opts[:name] : path.basename(".bam")
    sam =  Bio::DB::Sam.new({:fasta=>@gene_models_path, :bam=>opts[:path]})
  end
  @parents[name] = sam
end
add_snp(snp) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 78
def add_snp(snp)
  snp.max_hits = self.max_hits
  @snp_map[snp.gene] = Array.new unless   @snp_map[snp.gene] 
  @snp_map[snp.gene] << snp

end
add_snp_file(filename, chromosome, snp_in, original_name) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 85
def add_snp_file(filename, chromosome, snp_in, original_name)

  File.open(filename) do | f |
    f.each_line do | line |
      snp = SNP.parse(line)
      snp.flanking_size = flanking_size
      if snp.position > 0
        snp.container = self
        snp.chromosome = chromosome
        snp.snp_in = snp_in
        snp.original_name = original_name
        @snp_map[snp.gene] = Array.new unless   @snp_map[snp.gene] 
        @snp_map[snp.gene] << snp   
      end

    end
  end
end
chromosome_sequence(region) click to toggle source

Retunrs the sequence for a region in the gene models (exon)

# File lib/bio/PolyploidTools/ExonContainer.rb, line 56
def chromosome_sequence(region)
  left_pad = 0
  #TODO: Padd if it goes to the right
  if(region.start < 1)
    left_pad = region.start * -1
    left_pad += 1
    region.start = 1
  end
  str = "-" * left_pad << @chromosomes_db.fetch_sequence(region)
  #str << "n" * (region.size - str.size + 1) if region.size > str.size
  str
end
fasta_string_for_snp(snp) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 106
def fasta_string_for_snp(snp)
  gene_region = snp.covered_region
  local_pos_in_gene = snp.local_position
  ret_str = ""
  @parents.each  do |name, bam|
    ret_str << ">#{gene_region.id}_SNP-#{snp.position}_#{name} Overlapping_exons:#{gene_region.to_s} localSNPpo:#{local_pos_in_gene+1}\n" 
    to_print =  bam.consensus_with_ambiguities(region: gene_region).to_s
    to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase
    ret_str << to_print << "\n"
  end

  snp.exon_list.each do | chromosome,  exon |
    target_region = exon.target_region
    exon_start_offset = exon.query_region.start - gene_region.start
    chr_local_pos=local_pos_in_gene + target_region.start + 1
    ret_str  << ">#{chromosome}_SNP-#{chr_local_pos} #{exon.to_s} #{target_region.orientation}\n"
    to_print =  "-" * exon_start_offset 
    chr_seq  =  chromosome_sequence(exon.target_region).to_s
    l_pos    =  exon_start_offset + local_pos_in_gene
    to_print << chr_seq
    to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase
    ret_str  << to_print
  end
  ret_str
end
gene_model_sequence(region) click to toggle source

Returns the sequence for a region in the gene models (exon)

# File lib/bio/PolyploidTools/ExonContainer.rb, line 27
def gene_model_sequence(region)
  #puts "Region: "
  #puts region.inspect
  target_reg = @gene_models_db.index.region_for_entry(region.entry)
  #puts target_reg.inspect
  region.end = target_reg.length if region.end > target_reg.length
  #entries[region.entry]

  seq=@gene_models_db.fetch_sequence(region)
  #puts "sequence: "
  #This is a patch that we need to fix in biosamtools:
  #puts seq
  index = seq.index('>')
  if(index )
    index -= 1
    #puts "Index: #{index}"
    seq = seq.slice(0..index) 
  end
  #puts seq
  seq
end
gene_models(path) click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 20
def gene_models(path)
  @gene_models_db = Bio::DB::Fasta::FastaFile.new(fasta: path)
  @gene_models_db.index
  @gene_models_path = path
end
print_fasta_snp_exones(file) click to toggle source
print_primer_3_exons(file, target_chromosome , parental, max_specific_primers: 20 ) click to toggle source
remove_alignments_over_max_hits() click to toggle source
# File lib/bio/PolyploidTools/ExonContainer.rb, line 218
def remove_alignments_over_max_hits
  @snp_map.each_pair do | gene, snp_array| 
    snp_array.each do |snp|
      total_hits = snp.exon_list.map {|e| e[1].size}.reduce(0,:+)
      snp.hit_count = total_hits
      if total_hits > max_hits
        snp.exon_list = {} 
        snp.repetitive = true
        snp.errors << "The marker is in a repetitive region (#{total_hits} hits to reference)"
      end
    end
  end
end