class Bio::Scf

Description

This class inherits from the SangerChromatogram superclass. It captures the information contained within an scf format chromatogram file generated by DNA sequencing. See the SangerChromatogram class for usage

Attributes

aqual[RW]

The quality of each base at each position along the length of the sequence is captured by the nqual attributes where n is one of a, c, g or t. Generally the quality will be high for the base that is called at a particular position and low for all the other bases. However at positions of poor sequence quality, more than one base may have similar top scores. By analysing the nqual attributes it may be possible to determine if the base calling was correct or not. The quality of the A base at each sequence position

comments[RW]

A hash of extra information extracted from the chromatogram file

cqual[RW]

The quality of the C base at each sequence position

gqual[RW]

The quality of the G base at each sequence position

tqual[RW]

The quality of the T base at each sequence position

Public Class Methods

new(string) click to toggle source

see SangerChromatogram class for how to create an Scf object and its usage

   # File lib/bio/db/sanger_chromatogram/scf.rb
37 def initialize(string)
38   header = string.slice(0,128)
39   # read in header info
40   @chromatogram_type, @samples, @sample_offset, @bases, @bases_left_clip, @bases_right_clip, @bases_offset, @comment_size, @comments_offset, @version, @sample_size, @code_set, @header_spare = header.unpack("a4 NNNNNNNN a4 NN N20")
41   get_traces(string)
42   get_bases_peakIndices_and_qualities(string)
43   get_comments(string)
44   if @comments["DYEP"]
45     @dye_mobility = @comments["DYEP"]
46   else
47     @dye_mobility = "Unnown"
48   end
49 end

Private Instance Methods

convert_accuracies_to_qualities() click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
181 def convert_accuracies_to_qualities
182   qualities = Array.new
183   for base_pos in (0..@sequence.length-1)
184     case sequence.slice(base_pos,1)
185     when "a"
186       qualities << @aqual[base_pos]
187     when "c"
188       qualities << @cqual[base_pos]
189     when "g"
190       qualities << @gqual[base_pos]
191     when "t"
192       qualities << @tqual[base_pos]
193     else
194       qualities << 0
195     end
196   end
197   return qualities
198 end
convert_deltas_to_values(trace_read) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
168 def convert_deltas_to_values(trace_read)
169   p_sample = 0;
170   for sample_num in (0..trace_read.size-1)
171     trace_read[sample_num] = trace_read[sample_num] + p_sample
172     p_sample = trace_read[sample_num];
173   end
174   p_sample = 0;
175   for sample_num in (0..trace_read.size-1)
176     trace_read[sample_num] = trace_read[sample_num] + p_sample
177     p_sample = trace_read[sample_num];
178   end
179   return trace_read
180 end
get_bases_peakIndices_and_qualities(string) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
 98 def get_bases_peakIndices_and_qualities(string)
 99   if @version == "3.00"
100     # now go and get the peak index information
101     offset = @bases_offset
102     length = @bases * 4
103     get_v3_peak_indices(string,offset,length)
104 
105     # now go and get the accuracy information
106     offset += length;
107     get_v3_accuracies(string,offset,length)
108 
109     # OK, now go and get the base information.
110     offset += length;
111     length = @bases;
112     get_v3_sequence(string,offset,length)
113 
114     #combine accuracies to get quality scores
115     @qualities= convert_accuracies_to_qualities
116   elsif @version == "2.00"
117     @peak_indices = []
118     @aqual = []
119     @cqual = []
120     @gqual = []
121     @tqual = []
122     @qualities = []
123     @sequence = ""
124     # now go and get the base information
125     offset = @bases_offset
126     length = @bases * 12
127     all_bases_info = string.slice(offset,length)
128 
129     (0..length-1).step(12) do |offset2|
130       base_info = all_bases_info.slice(offset2,12).unpack("N C C C C a C3")
131       @peak_indices << base_info[0]
132       @aqual << base_info[1]
133       @cqual << base_info[2]
134       @gqual << base_info[3]
135       @tqual << base_info[4]
136       @sequence += base_info[5].downcase
137       case base_info[5].downcase
138       when "a"
139         @qualities << base_info[1]
140       when "c"
141         @qualities << base_info[2]
142       when "g"
143         @qualities << base_info[3]
144       when "t"
145         @qualities << base_info[4]
146       else
147         @qualities << 0
148       end
149     end
150   end
151 end
get_comments(string) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
199 def get_comments(string)
200   @comments = Hash.new
201   comment_string = string.slice(@comments_offset,@comment_size)
202   comment_string.gsub!(/\0/, "")
203   comment_array = comment_string.split("\n")
204   comment_array.each do |comment|
205     comment =~ /(\w+)=(.*)/
206     @comments[$1] = $2
207   end
208 end
get_traces(string) click to toggle source
   # File lib/bio/db/sanger_chromatogram/scf.rb
53 def get_traces(string)
54   if @version == "3.00"
55     # read in trace info
56     offset = @sample_offset
57     length = @samples * @sample_size
58     # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an  unsigned short
59     @sample_size == 2 ? byte = "n" : byte = "c"
60     for base in ["a" , "c" , "g" , "t"]
61       trace_read = string.slice(offset,length).unpack("#{byte}#{@samples}")
62       # convert offsets
63       for sample_num in (0..trace_read.size-1)
64         if trace_read[sample_num] > 30000
65           trace_read[sample_num] = trace_read[sample_num] - 65536
66         end
67       end
68       # For 8-bit data we need to emulate a signed/unsigned
69       # cast that is implicit in the C implementations.....
70       if @sample_size == 1
71         for sample_num in (0..trace_read.size-1)
72           trace_read[sample_num] += 256 if trace_read[sample_num] < 0
73         end
74       end
75       trace_read = convert_deltas_to_values(trace_read)
76       self.instance_variable_set("@#{base}trace", trace_read)
77       offset += length
78     end
79   elsif @version == "2.00"
80     @atrace = []
81     @ctrace = []
82     @gtrace = []
83     @ttrace = []
84     # read in trace info
85     offset = @sample_offset
86     length = @samples * @sample_size * 4
87     # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an  unsigned short
88     @sample_size == 2 ? byte = "n" : byte = "c"
89     trace_read = string.slice(offset,length).unpack("#{byte}#{@samples*4}")
90     (0..(@samples-1)*4).step(4) do |offset2|
91       @atrace << trace_read[offset2]
92       @ctrace << trace_read[offset2+1]
93       @gtrace << trace_read[offset2+2]
94       @ttrace << trace_read[offset2+3]
95     end
96   end
97 end
get_v3_accuracies(string,offset,length) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
155 def get_v3_accuracies(string,offset,length)
156   qualities   = string.slice(offset,length)
157   qual_length = length/4;
158   qual_offset = 0;
159   for base in ["a" , "c" , "g" , "t"]
160     self.instance_variable_set("@#{base}qual",qualities.slice(qual_offset,qual_length).unpack("C#{qual_length}"))
161     qual_offset += qual_length
162   end
163 end
get_v3_peak_indices(string,offset,length) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
152 def get_v3_peak_indices(string,offset,length)
153   @peak_indices = string.slice(offset,length).unpack("N#{length/4}")
154 end
get_v3_sequence(string,offset,length) click to toggle source
    # File lib/bio/db/sanger_chromatogram/scf.rb
164 def get_v3_sequence(string,offset,length)
165   @sequence = string.slice(offset,length).unpack("a#{length}").join('').downcase
166 end