class Bio::Abif

Description

This class inherits from the SangerChromatogram superclass. It captures the information contained within an ABIF format chromatogram file generated by DNA sequencing. See the SangerChromatogram class for usage.

Constants

DATA_TYPES
PACK_TYPES

Attributes

chemistry[RW]

The chemistry used when sequencing e.g Dye terminators => 'term.' (String)

sample_title[RW]

The sample title as entered when sequencing the sample (String)

Public Class Methods

new(string) click to toggle source

see SangerChromatogram class for how to create an Abif object and its usage

   # File lib/bio/db/sanger_chromatogram/abif.rb
37 def initialize(string)
38   header = string.slice(0,128)
39   # read in header info
40   @chromatogram_type, @version, @directory_tag_name, @directory_tag_number, @directory_element_type, @directory_element_size, @directory_number_of_elements, @directory_data_size, @directory_data_offset, @directory_data_handle= header.unpack("a4 n a4 N n n N N N N")
41   @version = @version/100.to_f
42   get_directory_entries(string)
43   # get sequence
44   @sequence = @directory_entries["PBAS"][1].data.map{|char| char.chr.downcase}.join("")
45   #get peak indices
46   @peak_indices = @directory_entries["PLOC"][1].data
47   #get qualities
48   @qualities = @directory_entries["PCON"][1].data
49   # get sample title
50   @sample_title = @directory_entries["SMPL"][1].data
51   @directory_entries["PDMF"].size > 2 ? @dye_mobility = @directory_entries["PDMF"][2].data : @dye_mobility = @directory_entries["PDMF"][1].data
52   #get trace data
53   @chemistry = @directory_entries["phCH"][1].data
54   base_order = @directory_entries["FWO_"][1].data.map{|char| char.chr.downcase}
55   (9..12).each do |data_index|
56     self.instance_variable_set("@#{base_order[data_index-9]}trace", @directory_entries["DATA"][data_index].data)
57   end
58 
59 end

Public Instance Methods

data(name, tag_number = 1) click to toggle source

Returns the data for the name. If not found, returns nil.


Arguments:

  • (required) name: (String) name of the data

  • (required) tag_number: (Integer) tag number (default 1)

Returns

any data type or nil

   # File lib/bio/db/sanger_chromatogram/abif.rb
68 def data(name, tag_number = 1)
69   d = @directory_entries[name]
70   d ? d[tag_number].data : nil
71 end

Private Instance Methods

get_directory_entries(string) click to toggle source
   # File lib/bio/db/sanger_chromatogram/abif.rb
74 def get_directory_entries(string)
75   @directory_entries = Hash.new
76   offset = @directory_data_offset
77   @directory_number_of_elements.times do
78     entry = DirectoryEntry.new
79     entry_fields = string.slice(offset, @directory_element_size)
80     entry.name, entry.tag_number, entry.element_type, entry.element_size, entry.number_of_elements, entry.data_size, entry.data_offset = entry_fields.unpack("a4 N n n N N N")
81     # populate the entry with the data it refers to
82     if entry.data_size > 4
83       get_entry_data(entry, string)
84     else
85       get_entry_data(entry, entry_fields)
86     end
87     if @directory_entries.has_key?(entry.name)
88       @directory_entries[entry.name][entry.tag_number] = entry
89     else
90       @directory_entries[entry.name] = Array.new
91       @directory_entries[entry.name][entry.tag_number] = entry
92     end
93     offset += @directory_element_size
94   end
95 end
get_entry_data(entry, string) click to toggle source
    # File lib/bio/db/sanger_chromatogram/abif.rb
 96 def get_entry_data(entry, string)
 97   if entry.data_size > 4
 98     raw_data = string.slice(entry.data_offset, entry.data_size)
 99   else
100     raw_data = string.slice(20,4)
101   end
102   if entry.element_type > 1023
103     # user defined data: not processed as yet by this bioruby module
104     entry.data = raw_data
105   else
106     pack_type = PACK_TYPES[DATA_TYPES[entry.element_type]]
107     pack_type.match(/\*/) ? unpack_string = pack_type : unpack_string = "#{pack_type}#{entry.number_of_elements}"
108     entry.data = raw_data.unpack(unpack_string)
109     if pack_type == "CA*" # pascal string where the first byte is a charcter count and should therefore be removed
110       entry.data.shift
111     end
112   end
113 end