class Bio::Scf
Description¶ ↑
This class inherits from the SangerChromatogram
superclass. It captures the information contained within an scf format chromatogram file generated by DNA sequencing. See the SangerChromatogram
class for usage
Attributes
The quality of each base at each position along the length of the sequence is captured by the nqual attributes where n is one of a, c, g or t. Generally the quality will be high for the base that is called at a particular position and low for all the other bases. However at positions of poor sequence quality, more than one base may have similar top scores. By analysing the nqual attributes it may be possible to determine if the base calling was correct or not. The quality of the A base at each sequence position
A hash of extra information extracted from the chromatogram file
The quality of the C base at each sequence position
The quality of the G base at each sequence position
The quality of the T base at each sequence position
Public Class Methods
see SangerChromatogram
class for how to create an Scf
object and its usage
# File lib/bio/db/sanger_chromatogram/scf.rb 37 def initialize(string) 38 header = string.slice(0,128) 39 # read in header info 40 @chromatogram_type, @samples, @sample_offset, @bases, @bases_left_clip, @bases_right_clip, @bases_offset, @comment_size, @comments_offset, @version, @sample_size, @code_set, @header_spare = header.unpack("a4 NNNNNNNN a4 NN N20") 41 get_traces(string) 42 get_bases_peakIndices_and_qualities(string) 43 get_comments(string) 44 if @comments["DYEP"] 45 @dye_mobility = @comments["DYEP"] 46 else 47 @dye_mobility = "Unnown" 48 end 49 end
Private Instance Methods
# File lib/bio/db/sanger_chromatogram/scf.rb 181 def convert_accuracies_to_qualities 182 qualities = Array.new 183 for base_pos in (0..@sequence.length-1) 184 case sequence.slice(base_pos,1) 185 when "a" 186 qualities << @aqual[base_pos] 187 when "c" 188 qualities << @cqual[base_pos] 189 when "g" 190 qualities << @gqual[base_pos] 191 when "t" 192 qualities << @tqual[base_pos] 193 else 194 qualities << 0 195 end 196 end 197 return qualities 198 end
# File lib/bio/db/sanger_chromatogram/scf.rb 168 def convert_deltas_to_values(trace_read) 169 p_sample = 0; 170 for sample_num in (0..trace_read.size-1) 171 trace_read[sample_num] = trace_read[sample_num] + p_sample 172 p_sample = trace_read[sample_num]; 173 end 174 p_sample = 0; 175 for sample_num in (0..trace_read.size-1) 176 trace_read[sample_num] = trace_read[sample_num] + p_sample 177 p_sample = trace_read[sample_num]; 178 end 179 return trace_read 180 end
# File lib/bio/db/sanger_chromatogram/scf.rb 98 def get_bases_peakIndices_and_qualities(string) 99 if @version == "3.00" 100 # now go and get the peak index information 101 offset = @bases_offset 102 length = @bases * 4 103 get_v3_peak_indices(string,offset,length) 104 105 # now go and get the accuracy information 106 offset += length; 107 get_v3_accuracies(string,offset,length) 108 109 # OK, now go and get the base information. 110 offset += length; 111 length = @bases; 112 get_v3_sequence(string,offset,length) 113 114 #combine accuracies to get quality scores 115 @qualities= convert_accuracies_to_qualities 116 elsif @version == "2.00" 117 @peak_indices = [] 118 @aqual = [] 119 @cqual = [] 120 @gqual = [] 121 @tqual = [] 122 @qualities = [] 123 @sequence = "" 124 # now go and get the base information 125 offset = @bases_offset 126 length = @bases * 12 127 all_bases_info = string.slice(offset,length) 128 129 (0..length-1).step(12) do |offset2| 130 base_info = all_bases_info.slice(offset2,12).unpack("N C C C C a C3") 131 @peak_indices << base_info[0] 132 @aqual << base_info[1] 133 @cqual << base_info[2] 134 @gqual << base_info[3] 135 @tqual << base_info[4] 136 @sequence += base_info[5].downcase 137 case base_info[5].downcase 138 when "a" 139 @qualities << base_info[1] 140 when "c" 141 @qualities << base_info[2] 142 when "g" 143 @qualities << base_info[3] 144 when "t" 145 @qualities << base_info[4] 146 else 147 @qualities << 0 148 end 149 end 150 end 151 end
# File lib/bio/db/sanger_chromatogram/scf.rb 199 def get_comments(string) 200 @comments = Hash.new 201 comment_string = string.slice(@comments_offset,@comment_size) 202 comment_string.gsub!(/\0/, "") 203 comment_array = comment_string.split("\n") 204 comment_array.each do |comment| 205 comment =~ /(\w+)=(.*)/ 206 @comments[$1] = $2 207 end 208 end
# File lib/bio/db/sanger_chromatogram/scf.rb 53 def get_traces(string) 54 if @version == "3.00" 55 # read in trace info 56 offset = @sample_offset 57 length = @samples * @sample_size 58 # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short 59 @sample_size == 2 ? byte = "n" : byte = "c" 60 for base in ["a" , "c" , "g" , "t"] 61 trace_read = string.slice(offset,length).unpack("#{byte}#{@samples}") 62 # convert offsets 63 for sample_num in (0..trace_read.size-1) 64 if trace_read[sample_num] > 30000 65 trace_read[sample_num] = trace_read[sample_num] - 65536 66 end 67 end 68 # For 8-bit data we need to emulate a signed/unsigned 69 # cast that is implicit in the C implementations..... 70 if @sample_size == 1 71 for sample_num in (0..trace_read.size-1) 72 trace_read[sample_num] += 256 if trace_read[sample_num] < 0 73 end 74 end 75 trace_read = convert_deltas_to_values(trace_read) 76 self.instance_variable_set("@#{base}trace", trace_read) 77 offset += length 78 end 79 elsif @version == "2.00" 80 @atrace = [] 81 @ctrace = [] 82 @gtrace = [] 83 @ttrace = [] 84 # read in trace info 85 offset = @sample_offset 86 length = @samples * @sample_size * 4 87 # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short 88 @sample_size == 2 ? byte = "n" : byte = "c" 89 trace_read = string.slice(offset,length).unpack("#{byte}#{@samples*4}") 90 (0..(@samples-1)*4).step(4) do |offset2| 91 @atrace << trace_read[offset2] 92 @ctrace << trace_read[offset2+1] 93 @gtrace << trace_read[offset2+2] 94 @ttrace << trace_read[offset2+3] 95 end 96 end 97 end
# File lib/bio/db/sanger_chromatogram/scf.rb 155 def get_v3_accuracies(string,offset,length) 156 qualities = string.slice(offset,length) 157 qual_length = length/4; 158 qual_offset = 0; 159 for base in ["a" , "c" , "g" , "t"] 160 self.instance_variable_set("@#{base}qual",qualities.slice(qual_offset,qual_length).unpack("C#{qual_length}")) 161 qual_offset += qual_length 162 end 163 end
# File lib/bio/db/sanger_chromatogram/scf.rb 152 def get_v3_peak_indices(string,offset,length) 153 @peak_indices = string.slice(offset,length).unpack("N#{length/4}") 154 end
# File lib/bio/db/sanger_chromatogram/scf.rb 164 def get_v3_sequence(string,offset,length) 165 @sequence = string.slice(offset,length).unpack("a#{length}").join('').downcase 166 end