class Ensembl::Core::Slice
From the perl API tutorial (www.ensembl.org/info/software/core/core_tutorial.html): “A Slice
object represents a continuous region of a genome. Slices can be used to obtain sequence, features or other information from a particular region of interest.”
In contrast to almost all other classes of Ensembl::Core
, the Slice
class is not based on ActiveRecord
.
@example
chr4 = SeqRegion.find_by_name('4') my_slice = Slice.new(chr4, 95000, 98000, -1) puts my_slice.display_name #--> 'chromosome:4:Btau_3.1:95000:98000:1'
Attributes
Public Class Methods
Create an array of all Slices for a given coordinate system.
@example
slices = Slice.fetch_all('chromosome')
@param [String] coord_system_name Name of coordinate system @param [String] species Name of species @param [Integer] version Version of coordinate system @return [Array<Slice>] Array of Slice
objects
# File lib/bio-ensembl/core/slice.rb, line 152 def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil) answer = Array.new coord_system = nil if Collection.check species = species.downcase species_id = Collection.get_species_id(species) raise ArgumentError, "No specie found in the database with this name: #{species}" if species_id.nil? if version.nil? coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id(coord_system_name,species_id) else coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id_and_version(coord_system_name, species_id, version) end else if version.nil? coord_system = Ensembl::Core::CoordSystem.find_by_name(coord_system_name) else coord_system = Ensembl::Core::CoordSystem.find_by_name_and_version(coord_system_name, version) end end coord_system.seq_regions.each do |seq_region| answer.push(Ensembl::Core::Slice.new(seq_region)) end return answer end
Create a Slice
based on a Gene
@example
my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')
@param [String] gene_stable_id Ensembl
gene stable ID @param [Integer] flanking_seq_length Length of the flanking sequence @return [Slice] Slice
object
# File lib/bio-ensembl/core/slice.rb, line 119 def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0) gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id) gene = gene_stable_id.gene seq_region = gene.seq_region return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand) end
Create a Slice
without first creating the SeqRegion
object.
@example
my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)
@param [String] coord_system_name Name of coordinate system @param [String] seq_region_name name of the seq_region
@param [Integer] start Start position of the slice on the seq_region
@param [Integer] stop Stop position of the slice on the seq_region
@param [Integer] strand Strand that the slice should be @param [String] species Name of species in case of multi-species database @param [Integer] version Version number of the coordinate system @return [Slice] Slice
object
# File lib/bio-ensembl/core/slice.rb, line 74 def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil) all_coord_systems = nil if Collection.check species = species.downcase if species.nil? raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice" else species_id = Collection.get_species_id(species) raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil? all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id) end else all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name(coord_system_name) end coord_system = nil if version.nil? # Take the version with the lower rank coord_system = all_coord_systems.sort_by{|cs| cs.rank}.shift else coord_system = all_coord_systems.select{|cs| cs.version == version}[0] end unless coord_system.class == Ensembl::Core::CoordSystem message = "Couldn't find a Ensembl::Core::CoordSystem object with name '" + coord_system_name + "'" if ! version.nil? message += " and version '" + version + "'" end raise message end seq_region = Ensembl::Core::SeqRegion.find_by_name_and_coord_system_id(seq_region_name, coord_system.id) #seq_region = Ensembl::Core::SeqRegion.find_by_sql("SELECT * FROM seq_region WHERE name = '" + seq_region_name + "' AND coord_system_id = " + coord_system.id.to_s)[0] unless seq_region.class == Ensembl::Core::SeqRegion raise "Couldn't find a Ensembl::Core::SeqRegion object with the name '" + seq_region_name + "'" end return Ensembl::Core::Slice.new(seq_region, start, stop, strand) end
Create a Slice
based on a Transcript
@example
my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')
@param [String] transcript_stable_id Ensembl
transcript stable ID @param [Integer] flanking_seq_length Length of the flanking sequence @return [Slice] Slice
object
# File lib/bio-ensembl/core/slice.rb, line 135 def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0) transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id) transcript = transcript_stable_id.transcript seq_region = transcript.seq_region return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand) end
Create a new Slice
object from scratch.
@example
chr4 = SeqRegion.find_by_name('4') my_slice = Slice.new(chr4, 95000, 98000, -1)
@param [SeqRegion] seq_region
SeqRegion
object @param [Integer] start Start position of the slice on the seq_region
@param [Integer] stop Stop position of the slice on the seq_region
@param [Integer] strand Strand that the slice should be @return [Slice] Slice
object
# File lib/bio-ensembl/core/slice.rb, line 47 def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1) if start.nil? start = 1 end if stop.nil? stop = seq_region.length end unless seq_region.class == Ensembl::Core::SeqRegion raise 'First argument has to be a Ensembl::Core::SeqRegion object' end @seq_region, @start, @stop, @strand = seq_region, start, stop, strand @seq = nil end
Public Instance Methods
The display_name
method returns a full name of this slice, containing the name of the coordinate system, the sequence region, start and stop positions on that sequence region and the strand. E.g. for a slice of bovine chromosome 4 from position 95000 to 98000 on the reverse strand, the display_name
would look like: chromosome:4:Btau_3.1:95000:98000:-1
@example
puts my_slice.display_name
@return [String] Nicely formatted name of the Slice
# File lib/bio-ensembl/core/slice.rb, line 203 def display_name return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':') end
Get all DnaAlignFeatures that are located on a Slice
for a given Analysis
.
Pitfall: just looks at the CoordSystem
that the Slice
is located on. For example, if a Slice
is located on a SeqRegion
on the 'chromosome' CoordSystem
, but all dna_align_features
are annotated on SeqRegions of the 'scaffold' CoordSystem
, this method will return an empty array.
@example
my_slice.dna_align_features('Vertrna').each do |feature| puts feature.to_yaml end
@param [String] analysis_name Name of analysis @return [Array<DnaAlignFeature>] Array of DnaAlignFeature
objects
# File lib/bio-ensembl/core/slice.rb, line 557 def dna_align_features(analysis_name = nil) if analysis_name.nil? return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s) else analysis = Analysis.find_by_logic_name(analysis_name) return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s) end end
The Slice#excise
method removes a bit of a slice and returns the remainder as separate slices.
@example
original_slice = Slice.fetch_by_region('chromosome','X',1,10000) new_slices = original_slice.excise([500..750, 1050..1075]) new_slices.each do |s| puts s.display_name end # result: # chromosome:X:1:499:1 # chromosome:X:751:1049:1 # chromosome:X:1076:10000:1
@param [Array<Range>] Array of ranges to excise @return [Array<Slice>] Array of slices
# File lib/bio-ensembl/core/slice.rb, line 285 def excise(ranges) if ranges.class != Array raise RuntimeError, "Argument should be an array of ranges" end ranges.each do |r| if r.class != Range raise RuntimeError, "Argument should be an array of ranges" end end answer = Array.new previous_excised_stop = self.start - 1 ranges.sort_by{|r| r.first}.each do |r| subslice_start = previous_excised_stop + 1 if subslice_start <= r.first - 1 answer.push(Slice.new(self.seq_region, subslice_start, r.first - 1)) end previous_excised_stop = r.last if r.last > self.stop return answer end end subslice_start = previous_excised_stop + 1 answer.push(Slice.new(self.seq_region, subslice_start, self.stop)) return answer end
# File lib/bio-ensembl/core/slice.rb, line 607 def get_genotyped_variation_features variation_connection() Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop]) end
Don't use this method yourself.
# File lib/bio-ensembl/core/slice.rb, line 437 def get_objects(target_class, table_name, inclusive = false) answer = Array.new coord_system_ids_with_features = nil # Get all the coord_systems with this type of features on them if Collection.check coord_system_ids_with_features = Collection.find_all_coord_by_table_name(table_name,self.seq_region.coord_system.species_id).collect{|mc| mc.coord_system_id} else coord_system_ids_with_features = MetaCoord.find_all_by_table_name(table_name).collect{|mc| mc.coord_system_id} end # Get the features of the original slice if coord_system_ids_with_features.include?(self.seq_region.coord_system_id) sql = '' if inclusive sql = <<SQL SELECT * FROM #{table_name} WHERE seq_region_id = #{self.seq_region.id.to_s} AND (( seq_region_start BETWEEN #{self.start.to_s} AND #{self.stop.to_s} ) OR ( seq_region_end BETWEEN #{self.start.to_s} AND #{self.stop.to_s} ) OR ( seq_region_start <= #{self.start.to_s} AND seq_region_end >= #{self.stop.to_s} ) ) SQL else sql = <<SQL SELECT * FROM #{table_name} WHERE seq_region_id = #{self.seq_region.id.to_s} AND seq_region_start >= #{self.start.to_s} AND seq_region_end <= #{self.stop.to_s} SQL end answer.push(target_class.find_by_sql(sql)) coord_system_ids_with_features.delete(self.seq_region.coord_system_id) end # Transform the original slice to other coord systems and get those # features as well. At the moment, only 'direct' projections can be made. # Later, I'm hoping to add functionality for following a path from one # coord_system to another if they're not directly linked in the assembly # table. coord_system_ids_with_features.each do |target_coord_system_id| target_slices = self.project(CoordSystem.find(target_coord_system_id).name) target_slices.each do |slice| if slice.class == Slice if inclusive sql = <<SQL SELECT * FROM #{table_name} WHERE seq_region_id = #{slice.seq_region.id.to_s} AND (( seq_region_start BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} ) OR ( seq_region_end BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} ) OR ( seq_region_start <= #{slice.start.to_s} AND seq_region_end >= #{slice.stop.to_s} ) ) SQL else sql = <<SQL SELECT * FROM #{table_name} WHERE seq_region_id = #{slice.seq_region.id.to_s} AND seq_region_start >= #{slice.start.to_s} AND seq_region_end <= #{slice.stop.to_s} SQL end answer.push(target_class.find_by_sql(sql)) end end end answer.flatten! answer.uniq! return answer end
# File lib/bio-ensembl/core/slice.rb, line 612 def get_structural_variations variation_connection() Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop]) end
Method to retrieve Variation
features from Ensembl::Core::Slice
objects @example
slice = Slice.fetch_by_region('chromosome',1,50000,51000) variations = slice.get_variation_features variations.each do |vf| puts vf.variation_name, vf.allele_string puts vf.variation.ancestral_allele end
# File lib/bio-ensembl/core/slice.rb, line 602 def get_variation_features variation_connection() Ensembl::Variation::VariationFeature.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop]) end
Get the length of a slice
@example
chr4 = SeqRegion.find_by_name('4') my_slice = Slice.new(chr4, 95000, 98000, -1) puts my_slice.length
@return [Integer] Length of the slice
# File lib/bio-ensembl/core/slice.rb, line 189 def length return self.stop - self.start + 1 end
Don't use this method yourself.
# File lib/bio-ensembl/core/slice.rb, line 411 def method_missing(method_name, *args) table_name = method_name.to_s.singularize class_name = table_name.camelcase # Convert to the class object target_class = nil ObjectSpace.each_object(Class) do |o| if o.name =~ /^Ensembl::Core::#{class_name}$/ target_class = o end end # If it exists, see if it implements Sliceable if ! target_class.nil? and target_class.include?(Sliceable) inclusive = false if [TrueClass, FalseClass].include?(args[0].class) inclusive = args[0] end return self.get_objects(target_class, table_name, inclusive) end raise NoMethodError end
Get all MiscFeatures that are located on a Slice
for a given MiscSet
.
Pitfall: just looks at the CoordSystem
that the Slice
is located on. For example, if a Slice
is located on a SeqRegion
on the 'chromosome' CoordSystem
, but all misc_features
are annotated on SeqRegions of the 'scaffold' CoordSystem
, this method will return an empty array.
@example
my_slice.misc_features('encode').each do |feature| puts feature.to_yaml end
@param [String] code Code of MiscSet
@return [Array<MiscFeature>] Array of MiscFeature
objects
# File lib/bio-ensembl/core/slice.rb, line 523 def misc_features(code) answer = Array.new if code.nil? self.seq_region.misc_features.each do |mf| if mf.seq_region_start > self.start and mf.seq_region_end < self.stop answer.push(mf) end end else self.seq_region.misc_features.each do |mf| if mf.misc_sets[0].code == code if mf.seq_region_start > self.start and mf.seq_region_end < self.stop answer.push(mf) end end end end return answer end
The Slice#overlaps?
method checks if this slice overlaps another one. The other slice has to be on the same coordinate system
@example
slice_a = Slice.fetch_by_region('chromosome','X',1,1000) slice_b = Slice.fetch_by_region('chromosome','X',900,1500) if slice_a.overlaps?(slice_b) puts "There slices overlap" end
@param [Slice] other_slice Another slice @return [Boolean] True if slices overlap, otherwise false
# File lib/bio-ensembl/core/slice.rb, line 220 def overlaps?(other_slice) if ! other_slice.class == Slice raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments." end if self.seq_region.coord_system != other_slice.seq_region.coord_system raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name end self_range = self.start .. self.stop other_range = other_slice.start .. other_slice.stop if self_range.include?(other_slice.start) or other_range.include?(self.start) return true else return false end end
The Slice#project
method is used to transfer coordinates from one coordinate system to another. Suppose you have a slice on a contig in human (let's say on contig AC000031.6.1.38703) and you want to know the coordinates on the chromosome. This is a projection of coordinates from a higher ranked coordinate system to a lower ranked coordinate system. Projections can also be done from a chromosome to the contig level. However, it might be possible that more than one contig has to be included and that there exist gaps between the contigs. The output of this method therefore is an array of Slice
and Gap
objects.
At the moment, projections can only be done if the two coordinate systems are linked directly in the 'assembly' table.
@example
# Get a contig slice in cow and project to scaffold level # (i.e. going from a high rank coord system to a lower rank coord # system) source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007) target_slices = source_slice.project('scaffold') puts target_slices.length #--> 1 puts target_slices[0].display_name #--> scaffold:ChrUn.003.3522:6570:8535:1 # Get a chromosome slice in cow and project to scaffold level # (i.e. going from a low rank coord system to a higher rank coord # system) # The region 96652152..98000000 on BTA4 is covered by 2 scaffolds # that are separated by a gap. source_slice = Slice.fetch_by_region('chromosome','4', 96652152, 98000000) target_slices = source_slice.project('scaffold') puts target_slices.length #--> 3 first_bit, second_bit, third_bit = target_slices puts first_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.105:42:599579:1 puts second_bit.class #--> Gap puts third_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1
@param [String] coord_system_name Name of coordinate system to project
coordinates to
@return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps
# File lib/bio-ensembl/core/project.rb, line 53 def project(coord_system_name) answer = Array.new # an array of slices unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id) Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id] = self.seq_region.coord_system Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id].name] = self.seq_region.coord_system_id end source_coord_system = Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id] target_coord_system = nil if coord_system_name == 'toplevel' target_coord_system = source_coord_system.find_toplevel elsif coord_system_name == 'seqlevel' target_coord_system = source_coord_system.find_seqlevel else unless Ensembl::SESSION.coord_system_ids.has_key?(coord_system_name) cs = source_coord_system.find_level(coord_system_name) Ensembl::SESSION.coord_systems[cs.id] = cs Ensembl::SESSION.coord_system_ids[cs.name] = cs.id end target_coord_system = Ensembl::SESSION.coord_systems[Ensembl::SESSION.coord_system_ids[coord_system_name]] end if target_coord_system.rank < source_coord_system.rank # We're going from component to assembly, which is easy. assembly_links = self.seq_region.assembly_links_as_component(source_coord_system) if assembly_links.length == 0 return [] else assembly_links.each do |assembly_link| target_seq_region = assembly_link.asm_seq_region target_start = self.start + assembly_link.asm_start - assembly_link.cmp_start target_stop = self.stop + assembly_link.asm_start - assembly_link.cmp_start target_strand = self.strand * assembly_link.ori # 1x1=>1, 1x-1=>-1, -1x-1=>1 answer.push(Slice.new(target_seq_region, target_start, target_stop, target_strand)) end end else # If we're going from assembly to component, the answer of the target method # is an array consisting of Slices intermitted with Gaps. # ASSEMBLY_EXCEPTIONS # CAUTION: there are exceptions to the assembly (stored in the assembly_exception) # table which make things a little bit more difficult... For example, # in human, the assembly data for the pseudo-autosomal region (PAR) of # Y is *not* stored in the assembly table. Instead, there is a record # in the assembly_exception table that says: "For chr Y positions 1 # to 2709520, use chr X:1-2709520 for the assembly data." # As a solution, what we'll do here, is split the assembly up in blocks: # if a slice covers both the PAR and the allosomal region, we'll make # two subslices (let's call them blocks not to intercede with the # Slice#subslices method) and project these independently. assembly_exceptions = AssemblyException.find_all_by_seq_region_id(self.seq_region.id) if assembly_exceptions.length > 0 # Check if this bit of the original slice is covered in the # assembly_exception table. overlapping_exceptions = Array.new assembly_exceptions.each do |ae| if Slice.new(self.seq_region, ae.seq_region_start, ae.seq_region_end).overlaps?(self) if ae.exc_type == 'HAP' raise NotImplementedError, "The haplotype exceptions are not implemented (yet). You can't project this slice." end overlapping_exceptions.push(ae) end end if overlapping_exceptions.length > 0 # First get all assembly blocks from chromosome Y source_assembly_blocks = self.excise(overlapping_exceptions.collect{|e| e.seq_region_start .. e.seq_region_end}) # And insert the blocks of chromosome X all_assembly_blocks = Array.new #both for chr X and Y # First do all exceptions between the first and last block previous_block = nil source_assembly_blocks.sort_by{|b| b.start}.each do |b| if previous_block.nil? all_assembly_blocks.push(b) previous_block = b next end # Find the exception record exception = nil assembly_exceptions.each do |ae| if ae.seq_region_end == b.start - 1 exception = ae break end end new_slice_start = exception.exc_seq_region_start + ( previous_block.stop - exception.seq_region_start ) new_slice_stop = exception.exc_seq_region_start + ( b.start - exception.seq_region_start ) new_slice_strand = self.strand * exception.ori new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand) all_assembly_blocks.push(new_slice) all_assembly_blocks.push(b) previous_block = b end # And then see if we have to add an additional one at the start or end first_block = source_assembly_blocks.sort_by{|b| b.start}[0] if first_block.start > self.start exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[0] new_slice_start = exception.exc_seq_region_start + ( self.start - exception.seq_region_start ) new_slice_stop = exception.exc_seq_region_start + ( first_block.start - 1 - exception.seq_region_start ) new_slice_strand = self.strand * exception.ori new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand) all_assembly_blocks.unshift(new_slice) end last_block = source_assembly_blocks.sort_by{|b| b.start}[-1] if last_block.stop < self.stop exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[-1] new_slice_start = exception.exc_seq_region_start + ( last_block.stop + 1 - exception.seq_region_start ) new_slice_stop = exception.exc_seq_region_start + ( self.stop - exception.seq_region_start ) new_slice_strand = self.strand * exception.ori new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand) all_assembly_blocks.shift(new_slice) end answer = Array.new all_assembly_blocks.each do |b| answer.push(b.project(coord_system_name)) end answer.flatten! return answer end end # END OF ASSEMBLY_EXCEPTIONS # Get all AssemblyLinks starting from this assembly and for which # the cmp_seq_region.coord_system is what we want. assembly_links = self.seq_region.assembly_links_as_assembly(target_coord_system) # Now reject all the components that lie _before_ the source, then # reject all the components that lie _after_ the source. # Then sort based on their positions. sorted_overlapping_assembly_links = assembly_links.reject{|al| al.asm_end < self.start}.reject{|al| al.asm_start > self.stop}.sort_by{|al| al.asm_start} if sorted_overlapping_assembly_links.length == 0 return [] end # What we'll do, is create slices for all the underlying components, # including the first and the last one. At first, the first and last # components are added in their entirety and will only be cropped afterwards. previous_stop = nil sorted_overlapping_assembly_links.each_index do |i| this_link = sorted_overlapping_assembly_links[i] if i == 0 cmp_seq_region = nil if Ensembl::SESSION.seq_regions.has_key?(this_link.cmp_seq_region_id) cmp_seq_region = Ensembl::SESSION.seq_regions[this_link.cmp_seq_region_id] else cmp_seq_region = this_link.cmp_seq_region Ensembl::SESSION.seq_regions[cmp_seq_region.id] = cmp_seq_region end answer.push(Slice.new(cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori)) next end previous_link = sorted_overlapping_assembly_links[i-1] # If there is a gap with the previous link: add a gap if this_link.asm_start > ( previous_link.asm_end + 1 ) gap_size = this_link.asm_start - previous_link.asm_end - 1 answer.push(Gap.new(target_coord_system, gap_size)) end # And add the component itself as a Slice answer.push(Slice.new(this_link.cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori)) end # Now see if we have to crop the first and/or last slice first_link = sorted_overlapping_assembly_links[0] if self.start > first_link.asm_start if first_link.ori == -1 answer[0].stop = first_link.cmp_start + ( first_link.asm_end - self.start ) else answer[0].start = first_link.cmp_start + ( self.start - first_link.asm_start ) end end last_link = sorted_overlapping_assembly_links[-1] if self.stop < last_link.asm_end if last_link.ori == -1 answer[-1].start = last_link.cmp_start + ( last_link.asm_end - self.stop) else answer[-1].stop = last_link.cmp_start + ( self.stop - last_link.asm_start ) end end # And check if we have to add Ns at the front and/or back if self.start < first_link.asm_start gap_size = first_link.asm_start - self.start answer.unshift(Gap.new(target_coord_system, gap_size)) end if self.stop > last_link.asm_end gap_size = self.stop - last_link.asm_end answer.push(Gap.new(target_coord_system, gap_size)) end end return answer end
Get all ProteinAlignFeatures that are located on a Slice
for a given Analysis
.
Pitfall: just looks at the CoordSystem
that the Slice
is located on. For example, if a Slice
is located on a SeqRegion
on the 'chromosome' CoordSystem
, but all protein_align_features
are annotated on SeqRegions of the 'scaffold' CoordSystem
, this method will return an empty array.
@example
my_slice.protein_align_features('Uniprot').each do |feature| puts feature.to_yaml end
@param [String] analysis_name Name of analysis @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature
objects
# File lib/bio-ensembl/core/slice.rb, line 580 def protein_align_features(analysis_name) if analysis_name.nil? return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s) else analysis = Analysis.find_by_logic_name(analysis_name) return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s) end end
# File lib/bio-ensembl/core/slice.rb, line 360 def repeatmasked_seq raise NotImplementedError end
Creates overlapping subslices for a given Slice
.
@example
my_slice.split(50000, 250).each do |sub_slice| puts sub_slice.display_name end
@param [Integer] max_size Maximal size of subslices @param [Integer] overlap Overlap in bp between consecutive subslices @return [Array<Slice>] Array of Slice
objects
# File lib/bio-ensembl/core/slice.rb, line 386 def split(max_size = 100000, overlap = 0) sub_slices = Array.new i = 0 self.start.step(self.length, max_size - overlap - 1) do |i| sub_slices.push(self.sub_slice(i, i + max_size - 1)) end i -= (overlap + 1) sub_slices.push(self.sub_slice(i + max_size)) return sub_slices end
Take a sub_slice
from an existing one.
@example
my_sub_slice = my_slice.sub_slice(400,500)
@param [Integer] start Start of subslice relative to slice @param [Integer] stop Stop of subslice relative to slice @return [Slice] Slice
object
# File lib/bio-ensembl/core/slice.rb, line 372 def sub_slice(start = self.start, stop = self.stop) return self.class.new(self.seq_region, start, stop, self.strand) end
The Slice#within?
method checks if this slice is contained withing another one. The other slice has to be on the same coordinate system
@example
slice_a = Slice.fetch_by_region('chromosome','X',1,1000) slice_b = Slice.fetch_by_region('chromosome','X',900,950) if slice_b.overlaps?(slice_a) puts "Slice b is within slice a" end
@param [Slice] other_slice Another slice @return [Boolean] True if this slice is within other_slice, otherwise false
# File lib/bio-ensembl/core/slice.rb, line 250 def within?(other_slice) if ! other_slice.class == Slice raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments." end if self.seq_region.coord_system != other_slice.seq_region.coord_system raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name end self_range = self.start .. self.stop other_range = other_slice.start .. other_slice.stop if other_range.include?(self.start) and other_range.include?(self.stop) return true else return false end end
Private Instance Methods
# File lib/bio-ensembl/core/slice.rb, line 619 def variation_connection() if !Ensembl::Variation::DBConnection.connected? host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port) end end