class Bio::PolyploidTools::SNP

Attributes

chromosome[RW]
container[RW]
contig[RW]
errors[RW]
exon_list[RW]
flanking_size[RW]
gene[RW]

GENE,ORIGINAL,POS,SNP

genomes_count[RW]
hit_count[RW]
ideal_max[RW]
ideal_min[RW]
max_hits[RW]
orientation[RW]
original[RW]

GENE,ORIGINAL,POS,SNP

original_name[RW]

GENE,ORIGINAL,POS,SNP

position[RW]

GENE,ORIGINAL,POS,SNP

primer_3_min_seq_length[RW]
repetitive[RW]
snp[RW]

GENE,ORIGINAL,POS,SNP

snp_in[RW]

GENE,ORIGINAL,POS,SNP

snp_type[RW]
template_sequence[RW]
use_reference[RW]
variation_free_region[RW]

Public Class Methods

new() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 94
def initialize
  @genomes_count = 3 
  @primer_3_min_seq_length = 50
  @variation_free_region = 0
  @contig = false
  @max_hits = 8
  @exon_list = Hash.new {|hsh, key| hsh[key] = [] }
  @errors = Array.new
  @repetitive = false
  @hit_count = 0
end
parse(reg_str) click to toggle source

Format: Gene_name,Original,SNP_Pos,pos,chromosome A_comp0_c0_seq1,C,519,A,2A

# File lib/bio/PolyploidTools/SNP.rb, line 29
def self.parse(reg_str)
  reg_str.chomp!
  snp = SNP.new
  snp.gene, snp.original, snp.position, snp.snp, snp.chromosome = reg_str.split(",")
  snp.position.strip!
  snp.position =  snp.position.to_i
  snp.original.upcase!
  snp.original.strip!
  snp.snp.upcase!
  snp.snp.strip!  
  snp.chromosome.strip!
  
  snp.use_reference = false
  snp
end
parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two") ) click to toggle source

Format: IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 .

# File lib/bio/PolyploidTools/SNP.rb, line 47
def self.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two") )
  snp = SNP.new
  arr = vcf_line.split("\t")
  snp.gene     = arr[2]
  snp.original = arr[3]
  snp.position = arr[1]
  snp.snp = arr[4] 
  snp.chromosome = chr_arm_parser.call(arr[0]) 
  snp.contig = arr[0]
  snp.position.strip!
  snp.position =  snp.position.to_i
  snp.original.upcase!
  snp.original.strip!
  snp.snp.upcase!
  snp.snp.strip!  
  snp.chromosome.strip! 
  snp.orientation = :forward

  info = arr[7]
  if info
    details =  info.scan(/(\w+)=([\w|.]+)/).collect { |id, value| { :id => id, :value => value }}
    details.each do |e|   
      snp.orientation = :reverse if e[:id] == "OR" and e[:value] == "reverse"
    end
  end
  return snp
end

Public Instance Methods

add_exon(exon, arm, filter_best: true) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 164
def add_exon(exon, arm, filter_best: true)
  exon_list[arm] = Array.new unless exon_list[arm]
  if filter_best and exon_list[arm].size > 0
    current = exon_list[arm].first
    exon_list[arm] = [exon] if exon.record.score > current.record.score 
  else
     exon_list[arm] << exon 
  end
end
aligned_sequences() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 563
 def aligned_sequences
  
   return @aligned_sequences if @aligned_sequences
   return Hash.new if sequences_to_align.size == 0
   
   options = ['--maxiterate', '1000', '--localpair', '--quiet']
   mafft = Bio::MAFFT.new( "mafft" , options)
   #puts "Before MAFT:#{sequences_to_align.inspect}"

   report = mafft.query_align(sequences_to_align)
   @aligned_sequences = report.alignment
#   puts "MAFFT: #{report.alignment.inspect}"
   @aligned_sequences
 end
aligned_sequences_fasta() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 578
def aligned_sequences_fasta
  ret_str = ""
  aligned_sequences.each_pair do |name, seq|
    ret_str << ">#{self.to_s}-#{name}\n#{seq}\n" 
  end
  ret_str << ">MASK #{chromosome}\n#{mask_aligned_chromosomal_snp(chromosome)}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
  ret_str 
end
aligned_snp_position() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 604
def aligned_snp_position
  return @aligned_snp_position if @aligned_snp_position
  #puts self.inspect
  pos = -1
  parental_strings = Array.new
  parental_sequences.keys.each do | par |
    parental_strings << aligned_sequences[par]
  end
  $stderr.puts "WARN: #{self.to_s} #{parental_sequences.keys} is not of size 2 (#{parental_strings.size})" if parental_strings.size != 2

  local_pos_in_parental = get_snp_position_after_trim
  i = 0
  while i < parental_strings[0].size  do
    if local_pos_in_parental == 0 and parental_strings[0][i] != "-"
      pos = i
      if parental_strings[0][i] == parental_strings[1][i]
        $stderr.puts "WARN: #{self.to_s} doesn't have a SNP in the marked place (#{i})! \n#{parental_strings[0]}\n#{parental_strings[1]}"
      end 
    end
  
    local_pos_in_parental -= 1 if parental_strings[0][i] != "-"
    i += 1
  end
  @aligned_snp_position = pos
  return pos
end
chromosome_genome() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 151
def chromosome_genome
  chromosome[1]
end
chromosome_group() click to toggle source

We Only want the chromosome, we drop the arm. We don't use this any more. def chromosome= (chr)

@chromosome = chr

end

# File lib/bio/PolyploidTools/SNP.rb, line 147
def chromosome_group
  chromosome[0]
end
covered_region() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 174
def covered_region
  return @covered_region if @covered_region
  if self.use_reference
     reg = Bio::DB::Fasta::Region.new()
     reg.entry = gene
     reg.orientation = :forward
     reg.start = self.position - self.flanking_size
     reg.end = self.position + self.flanking_size
     reg.start = 1 if reg.start < 1
     return reg
  end
  
  min = @position
  max = @position
  # puts "Calculating covered region for #{self.inspect}"
  # puts "#{@exon_list.inspect}"
  # raise SNPException.new "Exons haven't been loaded for #{self.to_s}" if @exon_list.size == 0
  if @exon_list.size == 0
    min = self.position - self.flanking_size
    min = 1 if min < 1
    max =  self.position + self.flanking_size
  end
  @exon_list.each do | chromosome, exon_arr |
    exon_arr.each do | exon |
      reg = exon.query_region
      min = reg.start if reg.start < min
      max = reg.end if reg.end > max
    end
  end

  reg = Bio::DB::Fasta::Region.new()
  reg.entry = gene
  reg.orientation = :forward
  reg.start = min
  reg.end = max

  @covered_region = reg
  @covered_region
end
cut_and_pad_sequence_to_primer_region(sequence) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 541
def cut_and_pad_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  left_pad = 0
  right_pad=0
  if ideal_min < 0
    left_pad = ideal_min * -1
    ideal_min = 0 
  end
  if ideal_max > sequence.size
    right_pad = ideal_max - sequence.size
    ideal_max = sequence.size - 1 
  end  
  ret = "-" * left_pad  << sequence[ideal_min..ideal_max] <<  "-" * right_pad 
  ret
end
cut_sequence_to_primer_region(sequence) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 532
def cut_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  ideal_min = 0 if ideal_min < 0
  ideal_max = sequence.size - 1 if ideal_max > sequence.size
  # len = ideal_max - ideal_min
  sequence[ideal_min..ideal_max]
end
exon_for_chromosome(chromosome) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 470
def exon_for_chromosome (chromosome)
  selected_exon=exon_list[chromosome]
  puts "No exon with chromosome #{chromosome} for #{gene}"  unless selected_exon
  selected_exon
end
exon_sequences() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 778
def exon_sequences
  return @exon_sequences if @exon_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  @exon_sequences = Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon_arr| 
    exon_arr.each do |exon|
      exon_start_offset = exon.query_region.start - gene_region.start
      exon_seq  = "-" * exon_start_offset 
      exon_seq << container.chromosome_sequence(exon.target_region).to_s
      #puts exon_seq
      #l_pos = exon_start_offset + local_pos_in_gene
      unless exon.snp_in_gap
        #puts "local position: #{local_pos_in_gene}"
        #puts "Exon_seq: #{exon_seq}"
        exon_seq[local_pos_in_gene] = exon_seq[local_pos_in_gene].upcase
        exon_seq << "-" * (gene_region.size - exon_seq.size + 1)
        #puts exon.inspect
        @exon_sequences["#{chromosome}_#{exon.query_region.start}_#{exon.record.score}"] = exon_seq
      end
    end
  end
  @exon_sequences[@chromosome] = "-" * gene_region.size unless @exon_sequences[@chromosome]
  @exon_sequences
end
get_snp_position_after_trim() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 592
def get_snp_position_after_trim
  local_pos_in_gene = self.local_position
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  left_pad = 0
  if ideal_min < 0
    left_pad = ideal_min * -1
    ideal_min = 0 
  end
  local_pos_in_gene - ideal_min
end
get_target_sequence(names, chromosome) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 631
def get_target_sequence(names, chromosome)
  
  best = chromosome
  best_score = 0
  names.each do |e|  
    arr = e.split("_")
    if arr.length == 3
      score = arr[2].to_f
      if score >best_score
        best_score = score 
        best = e
      end
    end
  end
  best
end
left_padding() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 214
def left_padding
  flanking_size - self.local_position + 1
  #  primer_region.start - covered_region.start
  # 0
end
local_position() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 226
    def local_position
#      puts "local_position #{self.position} #{self.covered_region.start}"
      self.position - self.covered_region.start
    end
mask_aligned_chromosomal_snp(chromosome) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 650
def mask_aligned_chromosomal_snp(chromosome)
  names = aligned_sequences.keys
  parentals =  parental_sequences.keys

  position_after_trim = get_snp_position_after_trim

  names = names - parentals
  local_pos_in_gene = aligned_snp_position

  best_target = get_target_sequence(names, chromosome)
  masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[best_target]
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
  #puts "mask_aligned_chromosomal_snp(#{chromosome})"
  #puts names
  i = 0
  for  i in 0..masked_snps.size-1
    #puts i
    different = 0
    cov = 0
    from_group = 0
    nCount = 0
    seen = []
    names.each do | chr |
      if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
        #puts aligned_sequences[chr][i]
        cov += 1 
        nCount += 1 if aligned_sequences[chr][i] == 'N' or  aligned_sequences[chr][i] == 'n' # maybe fix this to use ambiguity codes instead.
        
        if chr[0] == chromosome_group and  not seen.include? chr[1]
          seen << chr[1]
          from_group += 1 

        end
        #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
        if chr != best_target 
          $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
          $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
          different += 1  if masked_snps[i].upcase != aligned_sequences[chr][i].upcase 
        end
      end
    end
    masked_snps[i] = "-" if different == 0
    masked_snps[i] = "-" if cov == 1
    masked_snps[i] = "-" if nCount > 0
    masked_snps[i] = "*" if cov == 0
    expected_snps = names.size - 1

   #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
    
    masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count
    #puts "#{i}:#{masked_snps[i]}"

    if i == local_pos_in_gene
      masked_snps[i] = "&"
      #puts "#{i}:#{masked_snps[i]}___"
      bases = ""
      names.each do | chr |
        bases << aligned_sequences[chr][i]  if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
      end
      
      code_reference = "n"
      code_reference = Bio::NucleicAcid.to_IUAPC(bases) unless bases == ""

      if Bio::NucleicAcid.is_valid(code_reference,   original) and Bio::NucleicAcid.is_valid(code_reference,   snp)
        masked_snps[i] = ":"
      end 

    end
    #i += 1
  end
  masked_snps
end
padded_position(pos) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 231
def padded_position(pos)
  pos + left_padding
end
parental_sequences() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 476
def parental_sequences
  return @parental_sequences if @parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
      unless name == self.snp_in
        seq[local_pos_in_gene] = self.original 
      end
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    
    seq[local_pos_in_gene] = self.snp if name == self.snp_in    
    @parental_sequences [name] = seq
  end
  @parental_sequences
end
primer_3_all_strings(target_chromosome, parental, max_specific_primers: 20 ) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 374
def primer_3_all_strings(target_chromosome, parental, max_specific_primers: 20 ) 

  pr = primer_region(target_chromosome, parental )
  primer_3_propertes = Array.new

  seq_original = String.new(pr.sequence)
  
  if seq_original.size < primer_3_min_seq_length
    errors << "The sequence (#{seq_original.size}) is shorter than #{primer_3_min_seq_length}"
    return primer_3_propertes 
  end
  
  if self.hit_count > self.max_hits
    errors << "The marker maps to #{self.hit_count} positions (max_hits: #{self.max_hits}). "
    repetitive = true
    return primer_3_propertes 
  end
  seq_original[pr.snp_pos] = self.original
  seq_original_reverse = reverse_complement_string(seq_original)

  seq_snp =  String.new(pr.sequence)
  seq_snp[pr.snp_pos] =  self.snp
  seq_snp_reverse = reverse_complement_string(seq_snp)

  rev_pos = seq_snp.size - position

  if pr.homoeologous
    @snp_type = "homoeologous"
  else
    @snp_type = "non-homoeologous"
  end
  
  total_candidates  = pr.chromosome_specific.size
  total_candidates += pr.crhomosome_specific_intron.size 
  total_candidates += pr.almost_chromosome_specific.size
  total_candidates += pr.almost_crhomosome_specific_intron.size

  skip_specific = total_candidates > max_specific_primers
  #puts "skip_specific: #{skip_specific}: #{total_candidates} > #{max_specific_primers}"
  pr.chromosome_specific.each do |pos|
    break if skip_specific
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.crhomosome_specific_intron.each do |pos|
    break if skip_specific
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_chromosome_specific.each do |pos|
    break if skip_specific
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_crhomosome_specific_intron.each do |pos|
    break if skip_specific
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific all #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :sequence=>seq_original}
  primer_3_propertes << return_primer_3_string(args)
  args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_nonspecific all #{@snp_type} #{chromosome}"
  args[:sequence] = seq_snp
  primer_3_propertes << return_primer_3_string(args)
  primer_3_propertes
end
primer_3_string(target_chromosome, parental, max_specific_primers: 20) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 465
def primer_3_string(target_chromosome, parental, max_specific_primers: 20) 
  strings = primer_3_all_strings(target_chromosome, parental, max_specific_primers: max_specific_primers) 
  strings.join
end
primer_fasta_string() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 235
def primer_fasta_string
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  ret_str = ""

  surrounding_parental_sequences.each do |name, seq|
    ret_str << ">#{gene_region.entry}-#{self.position}_#{name}\n" 
    ret_str << "#{seq}\n"
  end

  self.surrounding_exon_sequences.each do |chromosome, exon_seq|
    ret_str << ">#{chromosome}\n#{exon_seq}\n"
  end

  mask = surrounding_masked_chromosomal_snps(chromosome)
  ret_str << ">Mask\n#{mask}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
end
primer_region(target_chromosome, parental ) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 257
def primer_region(target_chromosome, parental )
  
  parental = aligned_sequences[parental].downcase
  names = aligned_sequences.keys
  target_chromosome = get_target_sequence(names, target_chromosome)
  
  chromosome_seq = aligned_sequences[target_chromosome]
  chromosome_seq = "-" * parental.size unless chromosome_seq
  chromosome_seq = chromosome_seq.downcase
  mask = mask_aligned_chromosomal_snp(target_chromosome)

  pr = PrimerRegion.new
  position_in_region = 0
  (0..parental.size-1).each do |i|

    if chromosome_seq[i] != '-' or parental[i] != '-'
      case   
      when mask[i] == '&'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = false
      when mask[i] == ':'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = true
      when mask[i] == '-'
        #When the mask doesnt detect a SNP, so we take the parental
        parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])

      when /[[:upper:]]/.match(mask[i])
        #This is a good candidate for marking a SNP
        #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i]
          pr.chromosome_specific << position_in_region
        end
      when /[[:lower:]]/.match(mask[i])
        #this is not that good candidate, but sitll gives specificity

        if parental[i] == '-'
          parental[i] = mask[i]
          pr.almost_crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i].upcase
          pr.almost_chromosome_specific << position_in_region
        end
      end #Case closes
      position_in_region += 1
    end #Closes region with bases
  end         

  pr.sequence=parental.gsub('-','')
  pr
end
return_primer_3_string(opts={}) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 320
def return_primer_3_string(opts={})

  left = opts[:left_pos]
  right = opts[:right_pos]
  sequence =  opts[:sequence]
  extra =  opts[:extra]

  orientation = "forward"
  if opts[:right_pos]
    orientation = "forward"
    if left > right
      left = sequence.size - left - 1
      right = sequence.size - right - 1
      sequence = reverse_complement_string(sequence)
      orientation = "reverse"
    end
    if @variation_free_region > 0
      check_str = sequence[right+1, @variation_free_region]
      return nil if check_str != check_str.downcase
    end

  end

  #puts "__"
  #puts self.inspect
  str = "SEQUENCE_ID=#{opts[:name]} #{orientation} \n"
  str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless  opts[:extra_f]
  str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
  str << extra if extra
  str << opts[:extra_f] if opts[:extra_f]
  str << "SEQUENCE_TEMPLATE=#{sequence}\n"
  
 
  str << "=\n"


  #In case that we don't have a right primer, we do both orientations
  unless opts[:right_pos]
    sequence =  opts[:sequence]    
    left = sequence.size - left - 1
    orientation = "reverse"
    sequence = reverse_complement_string(sequence)
    str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
    str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless  opts[:extra_r]
    str << opts[:extra_r] if opts[:extra_r]
    str << "SEQUENCE_TEMPLATE=#{sequence}\n"
    str << extra if extra
    str << "=\n"
  end

  str
end
reverse_complement_string(sequenc_str) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 315
def reverse_complement_string(sequenc_str)
  complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
  complement.reverse!
end
right_padding() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 220
def right_padding
  ret =  (2*flanking_size) - (left_padding  + self.covered_region.size ) 
  ret = 0 if ret < 0
  ret  
end
sequences_to_align() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 558
def sequences_to_align
  @sequences_to_align = surrounding_parental_sequences.merge(surrounding_exon_sequences) unless @sequences_to_align
  @sequences_to_align
end
setTemplateFromFastaFile(fastaFile ,flanking_size: 100) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 75
def setTemplateFromFastaFile(fastaFile ,flanking_size: 100)
  reg = Bio::DB::Fasta::Region.new
  reg.entry = gene
  reg.entry = @contig if @contig
  reg.start = position - flanking_size
  reg.end = position + flanking_size + 1
  reg.orientation = :forward
  entry = fastaFile.index.region_for_entry(reg.entry)
  reg.start = 1 if reg.start < 1
  reg.end = entry.length if reg.end > entry.length
  amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}")
  @position = @position - reg.start + 1
  @position = 1 if @position < 1
  #puts "about to fetch"
  self.template_sequence = fastaFile.fetch_sequence(reg)
  #puts "done fetching"
  template_sequence[position - 1] = amb
end
short_s() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 461
def short_s
  "#{original}#{position}#{snp}".upcase
end
snp_id_in_seq() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 137
def snp_id_in_seq
  "#{original}#{position}#{snp}"
end
surrounding_exon_sequences() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 757
def surrounding_exon_sequences
  return @surrounding_exon_sequences if @surrounding_exon_sequences
  gene_region = self.covered_region
  @surrounding_exon_sequences =  Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon_arr| 
    exon_arr.each do |exon|
      exon_start_offset = exon.query_region.start - gene_region.start
      flanking_region  = exon.target_flanking_region_from_position(position,flanking_size)
      #TODO: Padd when the exon goes over the regions...
      #puts flanking_region.inspect
      #Ignoring when the exon is in a gap
      unless exon.snp_in_gap 
        exon_seq = container.chromosome_sequence(flanking_region)
        @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
      end
    end
  end
  @surrounding_exon_sequences
end
surrounding_masked_chromosomal_snps(chromosome) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 725
def surrounding_masked_chromosomal_snps(chromosome)

  chromosomes = surrounding_exon_sequences
  names = chromosomes.keys
  get_target_sequence(names)
  masked_snps = chromosomes[chromosome].tr("-","+") if chromosomes[chromosome]
  masked_snps = "-" * (flanking_size * 2 ) unless chromosomes[chromosome]
  local_pos_in_gene = flanking_size 
  i = 0
  while i < masked_snps.size  do
    different = 0
    cov = 0
    names.each do | chr |
      if chromosomes[chr][i]  != "-" and chromosomes[chr][i]. != 'N' and chromosomes[chr][i]. != 'n'
        cov += 1 
        if chr != chromosome and masked_snps[i] != "+"
          different += 1  if masked_snps[i] != chromosomes[chr][i] 
        end
      end
    end
    masked_snps[i] = "-" if different == 0 and  masked_snps[i] != "+"
    masked_snps[i] = "-" if cov < 2
    masked_snps[i] = masked_snps[i].upcase if different > 1   

    if i == local_pos_in_gene
      masked_snps[i] = "&"
    end
    i += 1
  end
  masked_snps
end
surrounding_parental_sequences() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 503
def surrounding_parental_sequences
  return @surrounding_parental_sequences if @surrounding_parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @surrounding_parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq =  bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
      #puts "#{name} #{self.snp_in}"
      #puts "Modifing original: #{name}\n#{seq}"
      unless name == self.snp_in

        seq[local_pos_in_gene] = self.original 
      else
        seq[local_pos_in_gene] = self.snp 
      end
      #puts "#{seq}"
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    seq[local_pos_in_gene] = self.snp if name == self.snp_in  
    @surrounding_parental_sequences [name] = cut_and_pad_sequence_to_primer_region(seq)
  end
  @surrounding_parental_sequences
end
to_fasta() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 160
def to_fasta
    return ">#{self.gene}\n#{self.template_sequence}\n"
end
to_polymarker_coordinates(flanking_size, total:nil) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 106
def to_polymarker_coordinates(flanking_size, total:nil)
  start = position - flanking_size + 1
  start = 0 if start < 0
  total = flanking_size * 2 unless total
  total += 1
  new_position = position - start + 2
  [start , total, new_position ]
end
to_polymarker_sequence(flanking_size, total:nil) click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 115
def to_polymarker_sequence(flanking_size, total:nil)
  out = template_sequence.clone
  snp_seq = "[#{original}/#{snp}]"
  p = position-1 
  if orientation == :reverse
    p = out.length - p - 1
    s = Bio::Sequence::NA.new(out)
    s1 = Bio::Sequence::NA.new(original)
    s2 = Bio::Sequence::NA.new(snp) 
    out = s.reverse_complement
    snp_seq = "[#{s1.reverse_complement}/#{s2.reverse_complement}]"

  end

  out[p]  = snp_seq
  start = position - flanking_size - 1
  start = 0 if start < 0
  total = flanking_size * 2 unless total
  total += 5
  out[start , total ].upcase
end
to_s() click to toggle source
# File lib/bio/PolyploidTools/SNP.rb, line 457
def to_s
  "#{gene}:#{original}#{position}#{snp}#{chromosome}"
end