class Bio::PolyploidTools::NoSNPSequence

Attributes

sequence_original[RW]

Public Class Methods

parse(reg_str) click to toggle source

Format: snp name,chromsome from contig,microarray sequence BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA

# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 14
def self.parse(reg_str)
  reg_str.chomp!
  snp = NoSNPSequence.new

  arr = reg_str.split(",")
  
  if arr.size == 3
    snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
  elsif arr.size == 2
   snp.gene, snp.sequence_original = arr
 else
   throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
  end
  #snp.position = snp.position.to_i
  #snp.original.upcase!
  #snp.snp.upcase!
  snp.chromosome. strip!
  snp.snp_in = snp.chromosome
  snp.parse_sequence_snp
  snp.exon_list = Hash.new()
  snp
end

Public Instance Methods

aligned_sequences() click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 271
 def aligned_sequences
  
   return @aligned_sequences if @aligned_sequences
   if sequences_to_align.size <= 1
     @aligned_sequences = sequences_to_align
     return @aligned_sequences
   end
   options = ['--maxiterate', '1000', '--localpair', '--quiet']
   mafft = Bio::MAFFT.new( "mafft" , options)
 #  puts "Before MAFT:#{sequences_to_align.inspect}"
   report = mafft.query_align(sequences_to_align)
   @aligned_sequences = report.alignment
#   puts "MAFFT: #{report.alignment.inspect}"
   @aligned_sequences
 end
count_deletions_around(position,target_chromosome) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 99
def count_deletions_around(position,target_chromosome)
  first_aligned = aligned_sequences[target_chromosome]

  pos_start = position - flanking_size
  pos_end = position + flanking_size
  pos_start = 0 if pos_start < 0
  pos_end = first_aligned.size - 1 if pos_end >= first_aligned.size
  count = 0
  for i in pos_start..pos_end
    has_del = false

    aligned_sequences.each_pair do |name, val|  
      has_del = true if val[i] == '-'
      #print "#{val[i]}\t"
    end
    count += 1 if has_del
    #print "#{count}\n"
  end
  return count
end
get_base_in_different_chromosome(position, target_chromosome) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 217
def get_base_in_different_chromosome(position, target_chromosome)

    aligned_sequences.each_pair do |name, val|  
      next if target_chromosome == name
      return val[position]
    end
end
mask_aligned_chromosomal_snp(chromosome) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 56
 def mask_aligned_chromosomal_snp(chromosome)
  return nil if  aligned_sequences.values.size == 0
  names = aligned_sequences.keys
  parentals =  parental_sequences.keys
  names = names - parentals


  best_target = get_target_sequence(names, chromosome)
  masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[best_target]

  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
  i = 0
  while i < masked_snps.size
    different = 0
    cov = 0
    from_group = 0
    names.each do | chr |
      if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
        cov += 1 

        from_group += 1 if chr[0] == chromosome_group
        #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
        if chr != chromosome 
          $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
          $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
          different += 1  if masked_snps[i].upcase != aligned_sequences[chr][i].upcase 
        end
      end
    end
    masked_snps[i] = "-" if different == 0
    masked_snps[i] = "-" if cov == 1
    masked_snps[i] = "*" if cov == 0
    expected_snps = names.size - 1
   #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
    
    masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count

    i += 1
  end
  masked_snps
end
parse_sequence_snp() click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 41
def parse_sequence_snp
   @position = (sequence_original.length / 2).to_i 
   @original = sequence_original[@position]
   @snp = @original
end
parse_snp() click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 37
def parse_snp
   
end
primer_3_all_strings(target_chromosome, parental, max_specific_primers: nil) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 225
def primer_3_all_strings(target_chromosome, parental, max_specific_primers: nil) 
  #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
  pr = primer_region(target_chromosome, parental )
  #puts pr.inspect
  primer_3_propertes = Array.new

  seq_original = String.new(pr.sequence)
  #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
  return primer_3_propertes if seq_original.size < primer_3_min_seq_length

  if pr.homoeologous
    snp_type = "homoeologous"
  else
    snp_type = "non-homoeologous"
  end

  pr.chromosome_specific.each_with_index do |pos , i|
    seq_snp =  seq_original.clone
    #original_base = seq_snp[pos]
    #puts "___"
    #puts aligned_sequences.keys.inspect
    #puts target_chromosome
    t_chr =  get_target_sequence(aligned_sequences.keys, target_chromosome)
    other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)

    args = {
      :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}", 
      :left_pos => pos,  
      :sequence=>seq_snp
    }
    
    seq_snp =  seq_original.clone
    primer_3_propertes << return_primer_3_string(args)
    
    args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
    seq_snp[pos] =  other_chromosome_base.upcase
    args[:sequence] = seq_snp
    
    
    primer_3_propertes << return_primer_3_string(args)
  end

  
  primer_3_propertes
end
primer_region(target_chromosome, parental_chr ) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 120
def primer_region(target_chromosome, parental_chr )
  chromosome_seq = aligned_sequences[target_chromosome]
  names = aligned_sequences.keys
  target_chromosome = get_target_sequence(names, target_chromosome)
  chromosome_seq = aligned_sequences[target_chromosome]
  chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
  chromosome_seq = "-" * sequence_original.size unless chromosome_seq
  chromosome_seq = chromosome_seq.downcase
  #puts chromosome_seq
  mask = mask_aligned_chromosomal_snp(target_chromosome)

  pr = PrimerRegion.new
  pr.homoeologous = false
  position_in_region = 0
  parental = chromosome_seq.clone
  (0..chromosome_seq.size-1).each do |i|

    if chromosome_seq[i] != '-'
      case   
      when mask[i] == '-'
        #When the mask doesnt detect a SNP, so we take the parental
        parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])
      when /[[:upper:]]/.match(mask[i])
        #This is a good candidate for marking a SNP
        #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i]
          pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
          pr.chromosome_specific_in_mask << i
        end

      when /[[:lower:]]/.match(mask[i])
        #this is not that good candidate, but sitll gives specificity
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.almost_crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i].upcase
          pr.almost_chromosome_specific << position_in_region
          pr.almost_chromosome_specific_in_mask << i
        end
      end #Case closes
      pr.position_in_mask_from_template[position_in_region] = i
      position_in_region += 1
    end #Closes region with bases
  end         
  pr.sequence=parental.gsub('-','')
  pr
end
return_primer_3_string(opts={}) click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 173
def return_primer_3_string(opts={})
  #puts "return_primer_3_string #{opts.inspect}"
  left = opts[:left_pos]
  right = opts[:right_pos]
  sequence =  opts[:sequence].clone
  orientation = "forward"
  if opts[:right_pos]
    orientation = "forward"
    if left > right
      left = sequence.size - left - 1
      right = sequence.size - right - 1
      sequence = reverse_complement_string(sequence)
      orientation = "reverse"
    end
    if @variation_free_region > 0
      check_str = sequence[right+1, @variation_free_region]
      return nil if check_str != check_str.downcase
    end

  end


  str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
  str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
  str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
  str << "SEQUENCE_TEMPLATE=#{sequence}\n"
  str << "=\n"


  #In case that we don't have a right primer, we do both orientations
  unless opts[:right_pos]
    sequence =  opts[:sequence].clone    
    left = sequence.size - left - 1
    orientation = "reverse"
    sequence = reverse_complement_string(sequence)
    str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
    str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
    str << "SEQUENCE_TEMPLATE=#{sequence}\n"
    str << "=\n"
  end

  str
end
sequences_to_align() click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 51
def sequences_to_align
  @sequences_to_align = surrounding_exon_sequences unless @sequences_to_align
  @sequences_to_align
end
to_s() click to toggle source
# File lib/bio/PolyploidTools/NoSNPSequence.rb, line 47
def to_s
  "#{gene}:#{chromosome}"
end