class Ensembl::Core::Slice

From the perl API tutorial (www.ensembl.org/info/software/core/core_tutorial.html): “A Slice object represents a continuous region of a genome. Slices can be used to obtain sequence, features or other information from a particular region of interest.”

In contrast to almost all other classes of Ensembl::Core, the Slice class is not based on ActiveRecord.

@example

chr4 = SeqRegion.find_by_name('4')
my_slice = Slice.new(chr4, 95000, 98000, -1)
puts my_slice.display_name    #--> 'chromosome:4:Btau_3.1:95000:98000:1'

Attributes

seq[RW]
seq_region[RW]
start[RW]
stop[RW]
strand[RW]

Public Class Methods

fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil) click to toggle source

Create an array of all Slices for a given coordinate system.

@example

slices = Slice.fetch_all('chromosome')

@param [String] coord_system_name Name of coordinate system @param [String] species Name of species @param [Integer] version Version of coordinate system @return [Array<Slice>] Array of Slice objects

# File lib/bio-ensembl/core/slice.rb, line 152
def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil)
  answer = Array.new
  coord_system = nil
  if Collection.check
     species = species.downcase  
     species_id = Collection.get_species_id(species)
     raise ArgumentError, "No specie found in the database with this name: #{species}" if species_id.nil?
     if version.nil?
        coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id(coord_system_name,species_id)
     else
        coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id_and_version(coord_system_name, species_id, version)
     end  
  else
     if version.nil?
        coord_system = Ensembl::Core::CoordSystem.find_by_name(coord_system_name)
     else
        coord_system = Ensembl::Core::CoordSystem.find_by_name_and_version(coord_system_name, version) 
     end
  end
  coord_system.seq_regions.each do |seq_region|
    answer.push(Ensembl::Core::Slice.new(seq_region))
  end
  return answer
end
fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0) click to toggle source

Create a Slice based on a Gene

@example

my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')

@param [String] gene_stable_id Ensembl gene stable ID @param [Integer] flanking_seq_length Length of the flanking sequence @return [Slice] Slice object

# File lib/bio-ensembl/core/slice.rb, line 119
def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0)
  gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id)
  gene = gene_stable_id.gene
  seq_region = gene.seq_region

  return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand)
end
fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil) click to toggle source

Create a Slice without first creating the SeqRegion object.

@example

my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)

@param [String] coord_system_name Name of coordinate system @param [String] seq_region_name name of the seq_region @param [Integer] start Start position of the slice on the seq_region @param [Integer] stop Stop position of the slice on the seq_region @param [Integer] strand Strand that the slice should be @param [String] species Name of species in case of multi-species database @param [Integer] version Version number of the coordinate system @return [Slice] Slice object

# File lib/bio-ensembl/core/slice.rb, line 74
def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil)
  all_coord_systems = nil
  if Collection.check
    species = species.downcase
    if species.nil?
      raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice"
    else
      species_id = Collection.get_species_id(species)
      raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil? 
      all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id)
    end
  else
    all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name(coord_system_name)
  end
  coord_system = nil
  if version.nil? # Take the version with the lower rank
    coord_system = all_coord_systems.sort_by{|cs| cs.rank}.shift
  else
    coord_system = all_coord_systems.select{|cs| cs.version == version}[0]
  end
  unless coord_system.class == Ensembl::Core::CoordSystem
    message = "Couldn't find a Ensembl::Core::CoordSystem object with name '" + coord_system_name + "'"
    if ! version.nil?
      message += " and version '" + version + "'"
    end
    raise message
  end

  seq_region = Ensembl::Core::SeqRegion.find_by_name_and_coord_system_id(seq_region_name, coord_system.id)
  #seq_region = Ensembl::Core::SeqRegion.find_by_sql("SELECT * FROM seq_region WHERE name = '" + seq_region_name + "' AND coord_system_id = " + coord_system.id.to_s)[0]
  unless seq_region.class == Ensembl::Core::SeqRegion
    raise "Couldn't find a Ensembl::Core::SeqRegion object with the name '" + seq_region_name + "'"
  end

  return Ensembl::Core::Slice.new(seq_region, start, stop, strand)
end
fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0) click to toggle source

Create a Slice based on a Transcript

@example

my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')

@param [String] transcript_stable_id Ensembl transcript stable ID @param [Integer] flanking_seq_length Length of the flanking sequence @return [Slice] Slice object

# File lib/bio-ensembl/core/slice.rb, line 135
def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0)
  transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id)
  transcript = transcript_stable_id.transcript
  seq_region = transcript.seq_region

  return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand)
end
new(seq_region, start = 1, stop = seq_region.length, strand = 1) click to toggle source

Create a new Slice object from scratch.

@example

chr4 = SeqRegion.find_by_name('4')
my_slice = Slice.new(chr4, 95000, 98000, -1)

@param [SeqRegion] seq_region SeqRegion object @param [Integer] start Start position of the slice on the seq_region @param [Integer] stop Stop position of the slice on the seq_region @param [Integer] strand Strand that the slice should be @return [Slice] Slice object

# File lib/bio-ensembl/core/slice.rb, line 47
def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1)
  if start.nil?
    start = 1
  end
  if stop.nil?
    stop = seq_region.length
  end
  unless seq_region.class == Ensembl::Core::SeqRegion
    raise 'First argument has to be a Ensembl::Core::SeqRegion object'
  end
  @seq_region, @start, @stop, @strand = seq_region, start, stop, strand
  @seq = nil
end

Public Instance Methods

display_name() click to toggle source

The display_name method returns a full name of this slice, containing the name of the coordinate system, the sequence region, start and stop positions on that sequence region and the strand. E.g. for a slice of bovine chromosome 4 from position 95000 to 98000 on the reverse strand, the display_name would look like: chromosome:4:Btau_3.1:95000:98000:-1

@example

puts my_slice.display_name

@return [String] Nicely formatted name of the Slice

# File lib/bio-ensembl/core/slice.rb, line 203
def display_name
  return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':')
end
Also aliased as: to_s
dna_align_features(analysis_name = nil) click to toggle source

Get all DnaAlignFeatures that are located on a Slice for a given Analysis.

Pitfall: just looks at the CoordSystem that the Slice is located on. For example, if a Slice is located on a SeqRegion on the 'chromosome' CoordSystem, but all dna_align_features are annotated on SeqRegions of the 'scaffold' CoordSystem, this method will return an empty array.

@example

my_slice.dna_align_features('Vertrna').each do |feature|
  puts feature.to_yaml
end

@param [String] analysis_name Name of analysis @return [Array<DnaAlignFeature>] Array of DnaAlignFeature objects

# File lib/bio-ensembl/core/slice.rb, line 557
def dna_align_features(analysis_name = nil)
  if analysis_name.nil?
    return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
  else
    analysis = Analysis.find_by_logic_name(analysis_name)
    return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
  end
end
excise(ranges) click to toggle source

The Slice#excise method removes a bit of a slice and returns the remainder as separate slices.

@example

original_slice = Slice.fetch_by_region('chromosome','X',1,10000)
new_slices = original_slice.excise([500..750, 1050..1075])
new_slices.each do |s|
  puts s.display_name
end

# result:
#   chromosome:X:1:499:1
#   chromosome:X:751:1049:1
#   chromosome:X:1076:10000:1

@param [Array<Range>] Array of ranges to excise @return [Array<Slice>] Array of slices

# File lib/bio-ensembl/core/slice.rb, line 285
def excise(ranges)
  if ranges.class != Array
    raise RuntimeError, "Argument should be an array of ranges"
  end
  ranges.each do |r|
    if r.class != Range
      raise RuntimeError, "Argument should be an array of ranges"
    end
  end

  answer = Array.new
  previous_excised_stop = self.start - 1
  ranges.sort_by{|r| r.first}.each do |r|
    subslice_start = previous_excised_stop + 1
    if subslice_start <= r.first - 1
      answer.push(Slice.new(self.seq_region, subslice_start, r.first - 1))
    end
    previous_excised_stop = r.last
    if r.last > self.stop
      return answer
    end
  end
  subslice_start = previous_excised_stop + 1
  answer.push(Slice.new(self.seq_region, subslice_start, self.stop))
  return answer
end
get_genotyped_variation_features() click to toggle source
# File lib/bio-ensembl/core/slice.rb, line 607
def get_genotyped_variation_features
  variation_connection()
  Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
end
get_objects(target_class, table_name, inclusive = false) click to toggle source

Don't use this method yourself.

# File lib/bio-ensembl/core/slice.rb, line 437
      def get_objects(target_class, table_name, inclusive = false)
        answer = Array.new

        coord_system_ids_with_features = nil
        # Get all the coord_systems with this type of features on them
        if Collection.check
          coord_system_ids_with_features = Collection.find_all_coord_by_table_name(table_name,self.seq_region.coord_system.species_id).collect{|mc| mc.coord_system_id}
        else
          coord_system_ids_with_features = MetaCoord.find_all_by_table_name(table_name).collect{|mc| mc.coord_system_id}
        end  
        # Get the features of the original slice
        if coord_system_ids_with_features.include?(self.seq_region.coord_system_id)
          sql = ''
          if inclusive
            sql = <<SQL
SELECT * FROM #{table_name}
WHERE seq_region_id = #{self.seq_region.id.to_s}
AND (( seq_region_start BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
OR   ( seq_region_end BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
OR   ( seq_region_start <= #{self.start.to_s} AND seq_region_end >= #{self.stop.to_s} )
    )
SQL
          else
            sql = <<SQL
SELECT * FROM #{table_name}
WHERE seq_region_id = #{self.seq_region.id.to_s}
AND seq_region_start >= #{self.start.to_s}
AND seq_region_end <= #{self.stop.to_s}   
SQL
          end
          answer.push(target_class.find_by_sql(sql))
          coord_system_ids_with_features.delete(self.seq_region.coord_system_id)
        end

        # Transform the original slice to other coord systems and get those
        # features as well. At the moment, only 'direct' projections can be made.
        # Later, I'm hoping to add functionality for following a path from one
        # coord_system to another if they're not directly linked in the assembly
        # table.
        coord_system_ids_with_features.each do |target_coord_system_id|
          target_slices = self.project(CoordSystem.find(target_coord_system_id).name)
          target_slices.each do |slice|
            if slice.class == Slice
              if inclusive
                sql = <<SQL
SELECT * FROM #{table_name}
WHERE seq_region_id = #{slice.seq_region.id.to_s}
AND (( seq_region_start BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
OR   ( seq_region_end BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
OR   ( seq_region_start <= #{slice.start.to_s} AND seq_region_end >= #{slice.stop.to_s} )
    )
SQL
              else
                sql = <<SQL
SELECT * FROM #{table_name}
WHERE seq_region_id = #{slice.seq_region.id.to_s}
AND seq_region_start >= #{slice.start.to_s}
AND seq_region_end <= #{slice.stop.to_s}   
SQL
              end 
                answer.push(target_class.find_by_sql(sql))
            end
          end
        end

        answer.flatten!
        answer.uniq!

        return answer
      end
get_structural_variations() click to toggle source
# File lib/bio-ensembl/core/slice.rb, line 612
def get_structural_variations
  variation_connection()
  Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
end
get_variation_features() click to toggle source

Method to retrieve Variation features from Ensembl::Core::Slice objects @example

slice = Slice.fetch_by_region('chromosome',1,50000,51000)
variations = slice.get_variation_features
variations.each do |vf|
 puts vf.variation_name, vf.allele_string
 puts vf.variation.ancestral_allele
end
# File lib/bio-ensembl/core/slice.rb, line 602
def get_variation_features
  variation_connection()
  Ensembl::Variation::VariationFeature.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
end
length() click to toggle source

Get the length of a slice

@example

chr4 = SeqRegion.find_by_name('4')
my_slice = Slice.new(chr4, 95000, 98000, -1)
puts my_slice.length

@return [Integer] Length of the slice

# File lib/bio-ensembl/core/slice.rb, line 189
def length
  return self.stop - self.start + 1
end
method_missing(method_name, *args) click to toggle source

Don't use this method yourself.

# File lib/bio-ensembl/core/slice.rb, line 411
def method_missing(method_name, *args)
  table_name = method_name.to_s.singularize
  class_name = table_name.camelcase

  # Convert to the class object
  target_class = nil
  ObjectSpace.each_object(Class) do |o|
    if o.name =~ /^Ensembl::Core::#{class_name}$/
      target_class = o
    end
  end

  # If it exists, see if it implements Sliceable
  if ! target_class.nil? and target_class.include?(Sliceable)
    inclusive = false
    if [TrueClass, FalseClass].include?(args[0].class)
      inclusive = args[0]
    end
    return self.get_objects(target_class, table_name, inclusive)
  end

  raise NoMethodError

end
misc_features(code) click to toggle source

Get all MiscFeatures that are located on a Slice for a given MiscSet.

Pitfall: just looks at the CoordSystem that the Slice is located on. For example, if a Slice is located on a SeqRegion on the 'chromosome' CoordSystem, but all misc_features are annotated on SeqRegions of the 'scaffold' CoordSystem, this method will return an empty array.

@example

my_slice.misc_features('encode').each do |feature|
  puts feature.to_yaml
end

@param [String] code Code of MiscSet @return [Array<MiscFeature>] Array of MiscFeature objects

# File lib/bio-ensembl/core/slice.rb, line 523
def misc_features(code)
  answer = Array.new
  if code.nil?
    self.seq_region.misc_features.each do |mf|
      if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
        answer.push(mf)
      end
    end
  else
    self.seq_region.misc_features.each do |mf|
      if mf.misc_sets[0].code == code
        if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
          answer.push(mf)
        end
      end
    end
  end
  return answer
end
overlaps?(other_slice) click to toggle source

The Slice#overlaps? method checks if this slice overlaps another one. The other slice has to be on the same coordinate system

@example

slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
slice_b = Slice.fetch_by_region('chromosome','X',900,1500)
if slice_a.overlaps?(slice_b)
  puts "There slices overlap"
end

@param [Slice] other_slice Another slice @return [Boolean] True if slices overlap, otherwise false

# File lib/bio-ensembl/core/slice.rb, line 220
def overlaps?(other_slice)
  if ! other_slice.class == Slice
    raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
  end
  if self.seq_region.coord_system != other_slice.seq_region.coord_system
    raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
  end

  self_range = self.start .. self.stop
  other_range = other_slice.start .. other_slice.stop

  if self_range.include?(other_slice.start) or other_range.include?(self.start)
    return true
  else
    return false
  end
end
project(coord_system_name) click to toggle source

The Slice#project method is used to transfer coordinates from one coordinate system to another. Suppose you have a slice on a contig in human (let's say on contig AC000031.6.1.38703) and you want to know the coordinates on the chromosome. This is a projection of coordinates from a higher ranked coordinate system to a lower ranked coordinate system. Projections can also be done from a chromosome to the contig level. However, it might be possible that more than one contig has to be included and that there exist gaps between the contigs. The output of this method therefore is an array of Slice and Gap objects.

At the moment, projections can only be done if the two coordinate systems are linked directly in the 'assembly' table.

@example

# Get a contig slice in cow and project to scaffold level
# (i.e. going from a high rank coord system to a lower rank coord
# system)
source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
target_slices = source_slice.project('scaffold')
puts target_slices.length          #--> 1
puts target_slices[0].display_name #--> scaffold:ChrUn.003.3522:6570:8535:1

# Get a chromosome slice in cow and project to scaffold level
# (i.e. going from a low rank coord system to a higher rank coord
# system)
# The region 96652152..98000000 on BTA4 is covered by 2 scaffolds
# that are separated by a gap.
source_slice = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
target_slices = source_slice.project('scaffold')
puts target_slices.length          #--> 3
first_bit, second_bit, third_bit = target_slices
puts first_bit.display_name        #--> scaffold:Btau_3.1:Chr4.003.105:42:599579:1
puts second_bit.class              #--> Gap
puts third_bit.display_name        #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1

@param [String] coord_system_name Name of coordinate system to project

coordinates to

@return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps

# File lib/bio-ensembl/core/project.rb, line 53
def project(coord_system_name)
  answer = Array.new # an array of slices
  unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id)
    Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id] = self.seq_region.coord_system
    Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id].name] = self.seq_region.coord_system_id
  end
  source_coord_system = Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id]
  target_coord_system = nil
  if coord_system_name == 'toplevel'
    target_coord_system = source_coord_system.find_toplevel
  elsif coord_system_name == 'seqlevel'
    target_coord_system = source_coord_system.find_seqlevel
  else
    unless Ensembl::SESSION.coord_system_ids.has_key?(coord_system_name)
      cs = source_coord_system.find_level(coord_system_name)
      Ensembl::SESSION.coord_systems[cs.id] = cs
      Ensembl::SESSION.coord_system_ids[cs.name] = cs.id
    end
    target_coord_system = Ensembl::SESSION.coord_systems[Ensembl::SESSION.coord_system_ids[coord_system_name]]
  end

  if target_coord_system.rank < source_coord_system.rank
    # We're going from component to assembly, which is easy.
    assembly_links = self.seq_region.assembly_links_as_component(source_coord_system)
    
    if assembly_links.length == 0
      return []
    else
      assembly_links.each do |assembly_link|
        target_seq_region = assembly_link.asm_seq_region
        target_start = self.start + assembly_link.asm_start - assembly_link.cmp_start
        target_stop = self.stop + assembly_link.asm_start - assembly_link.cmp_start
        target_strand = self.strand * assembly_link.ori # 1x1=>1, 1x-1=>-1, -1x-1=>1
        
        answer.push(Slice.new(target_seq_region, target_start, target_stop, target_strand))
      end
    end
    
  else
    # If we're going from assembly to component, the answer of the target method
    # is an array consisting of Slices intermitted with Gaps.

    # ASSEMBLY_EXCEPTIONS
    # CAUTION: there are exceptions to the assembly (stored in the assembly_exception)
    # table which make things a little bit more difficult... For example,
    # in human, the assembly data for the pseudo-autosomal region (PAR) of
    # Y is *not* stored in the assembly table. Instead, there is a record
    # in the assembly_exception table that says: "For chr Y positions 1
    # to 2709520, use chr X:1-2709520 for the assembly data."
    # As a solution, what we'll do here, is split the assembly up in blocks:
    # if a slice covers both the PAR and the allosomal region, we'll make
    # two subslices (let's call them blocks not to intercede with the
    # Slice#subslices method) and project these independently.
    assembly_exceptions = AssemblyException.find_all_by_seq_region_id(self.seq_region.id)
    if assembly_exceptions.length > 0
      # Check if this bit of the original slice is covered in the
      # assembly_exception table.
      overlapping_exceptions = Array.new
      assembly_exceptions.each do |ae|
        if Slice.new(self.seq_region, ae.seq_region_start, ae.seq_region_end).overlaps?(self)
          if ae.exc_type == 'HAP'
            raise NotImplementedError, "The haplotype exceptions are not implemented (yet). You can't project this slice."
          end
          overlapping_exceptions.push(ae)
        end
      end

      if overlapping_exceptions.length > 0
        # First get all assembly blocks from chromosome Y
        source_assembly_blocks = self.excise(overlapping_exceptions.collect{|e| e.seq_region_start .. e.seq_region_end})
        # And insert the blocks of chromosome X
        all_assembly_blocks = Array.new #both for chr X and Y
        # First do all exceptions between the first and last block
        previous_block = nil
        source_assembly_blocks.sort_by{|b| b.start}.each do |b|
          if previous_block.nil?
            all_assembly_blocks.push(b)
            previous_block = b
            next
          end
          # Find the exception record
          exception = nil
          assembly_exceptions.each do |ae|
            if ae.seq_region_end == b.start - 1
              exception = ae
              break
            end
          end

          new_slice_start = exception.exc_seq_region_start + ( previous_block.stop - exception.seq_region_start )
          new_slice_stop = exception.exc_seq_region_start + ( b.start - exception.seq_region_start )
          new_slice_strand = self.strand * exception.ori
          new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)

          all_assembly_blocks.push(new_slice)
          all_assembly_blocks.push(b)
          previous_block = b
        end

        # And then see if we have to add an additional one at the start or end
        first_block = source_assembly_blocks.sort_by{|b| b.start}[0]
        if first_block.start > self.start
          exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[0]
          new_slice_start = exception.exc_seq_region_start + ( self.start - exception.seq_region_start )
          new_slice_stop = exception.exc_seq_region_start + ( first_block.start - 1 - exception.seq_region_start )
          new_slice_strand = self.strand * exception.ori
          new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)

          all_assembly_blocks.unshift(new_slice)
        end

        last_block = source_assembly_blocks.sort_by{|b| b.start}[-1]
        if last_block.stop < self.stop
          exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[-1]
          new_slice_start = exception.exc_seq_region_start + ( last_block.stop + 1 - exception.seq_region_start )
          new_slice_stop = exception.exc_seq_region_start + ( self.stop - exception.seq_region_start )
          new_slice_strand = self.strand * exception.ori
          new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)

          all_assembly_blocks.shift(new_slice)
        end

        answer = Array.new
        all_assembly_blocks.each do |b|
          answer.push(b.project(coord_system_name))
        end
        answer.flatten!

        return answer
      end

    end
    # END OF ASSEMBLY_EXCEPTIONS

    # Get all AssemblyLinks starting from this assembly and for which
    # the cmp_seq_region.coord_system is what we want.
    assembly_links = self.seq_region.assembly_links_as_assembly(target_coord_system)

    # Now reject all the components that lie _before_ the source, then
    # reject all the components that lie _after_ the source.
    # Then sort based on their positions.
    sorted_overlapping_assembly_links = assembly_links.reject{|al| al.asm_end < self.start}.reject{|al| al.asm_start > self.stop}.sort_by{|al| al.asm_start}
    if sorted_overlapping_assembly_links.length == 0
      return []
    end

    # What we'll do, is create slices for all the underlying components,
    # including the first and the last one. At first, the first and last
    # components are added in their entirety and will only be cropped afterwards.
    previous_stop = nil
    sorted_overlapping_assembly_links.each_index do |i|
      this_link = sorted_overlapping_assembly_links[i]
      if i == 0
        cmp_seq_region = nil
        if Ensembl::SESSION.seq_regions.has_key?(this_link.cmp_seq_region_id)
          cmp_seq_region = Ensembl::SESSION.seq_regions[this_link.cmp_seq_region_id]
        else
          cmp_seq_region = this_link.cmp_seq_region
          Ensembl::SESSION.seq_regions[cmp_seq_region.id] = cmp_seq_region
        end
        answer.push(Slice.new(cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
          next
      end
      previous_link = sorted_overlapping_assembly_links[i-1]

      # If there is a gap with the previous link: add a gap
      if this_link.asm_start > ( previous_link.asm_end + 1 )
        gap_size = this_link.asm_start - previous_link.asm_end - 1
        answer.push(Gap.new(target_coord_system, gap_size))
      end

      # And add the component itself as a Slice
      answer.push(Slice.new(this_link.cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
    end

    # Now see if we have to crop the first and/or last slice
    first_link = sorted_overlapping_assembly_links[0]
    if self.start > first_link.asm_start
      if first_link.ori == -1
        answer[0].stop = first_link.cmp_start + ( first_link.asm_end - self.start )
      else
        answer[0].start = first_link.cmp_start + ( self.start - first_link.asm_start )
      end
    end

    last_link = sorted_overlapping_assembly_links[-1]
    if self.stop < last_link.asm_end
      if last_link.ori == -1
        answer[-1].start = last_link.cmp_start + ( last_link.asm_end - self.stop)
      else
        answer[-1].stop = last_link.cmp_start + ( self.stop - last_link.asm_start )
      end
    end

    # And check if we have to add Ns at the front and/or back
    if self.start < first_link.asm_start
      gap_size = first_link.asm_start - self.start
      answer.unshift(Gap.new(target_coord_system, gap_size))
    end
    if self.stop > last_link.asm_end
      gap_size = self.stop - last_link.asm_end
      answer.push(Gap.new(target_coord_system, gap_size))
    end
  end
  return answer

end
protein_align_features(analysis_name) click to toggle source

Get all ProteinAlignFeatures that are located on a Slice for a given Analysis.

Pitfall: just looks at the CoordSystem that the Slice is located on. For example, if a Slice is located on a SeqRegion on the 'chromosome' CoordSystem, but all protein_align_features are annotated on SeqRegions of the 'scaffold' CoordSystem, this method will return an empty array.

@example

my_slice.protein_align_features('Uniprot').each do |feature|
  puts feature.to_yaml
end

@param [String] analysis_name Name of analysis @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature objects

# File lib/bio-ensembl/core/slice.rb, line 580
def protein_align_features(analysis_name)
  if analysis_name.nil?
    return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
  else
    analysis = Analysis.find_by_logic_name(analysis_name)
    return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
  end
end
repeatmasked_seq() click to toggle source
# File lib/bio-ensembl/core/slice.rb, line 360
def repeatmasked_seq
  raise NotImplementedError
end
split(max_size = 100000, overlap = 0) click to toggle source

Creates overlapping subslices for a given Slice.

@example

my_slice.split(50000, 250).each do |sub_slice|
  puts sub_slice.display_name
end

@param [Integer] max_size Maximal size of subslices @param [Integer] overlap Overlap in bp between consecutive subslices @return [Array<Slice>] Array of Slice objects

# File lib/bio-ensembl/core/slice.rb, line 386
def split(max_size = 100000, overlap = 0)
  sub_slices = Array.new
  i = 0
  self.start.step(self.length, max_size - overlap - 1) do |i|
    sub_slices.push(self.sub_slice(i, i + max_size - 1))
  end
  i -= (overlap + 1)
  sub_slices.push(self.sub_slice(i + max_size))
  return sub_slices
end
sub_slice(start = self.start, stop = self.stop) click to toggle source

Take a sub_slice from an existing one.

@example

my_sub_slice = my_slice.sub_slice(400,500)

@param [Integer] start Start of subslice relative to slice @param [Integer] stop Stop of subslice relative to slice @return [Slice] Slice object

# File lib/bio-ensembl/core/slice.rb, line 372
def sub_slice(start = self.start, stop = self.stop)
  return self.class.new(self.seq_region, start, stop, self.strand)
end
to_s()
Alias for: display_name
within?(other_slice) click to toggle source

The Slice#within? method checks if this slice is contained withing another one. The other slice has to be on the same coordinate system

@example

slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
slice_b = Slice.fetch_by_region('chromosome','X',900,950)
if slice_b.overlaps?(slice_a)
  puts "Slice b is within slice a"
end

@param [Slice] other_slice Another slice @return [Boolean] True if this slice is within other_slice, otherwise false

# File lib/bio-ensembl/core/slice.rb, line 250
def within?(other_slice)
  if ! other_slice.class == Slice
    raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
  end
  if self.seq_region.coord_system != other_slice.seq_region.coord_system
    raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
  end

  self_range = self.start .. self.stop
  other_range = other_slice.start .. other_slice.stop

  if other_range.include?(self.start) and other_range.include?(self.stop)
    return true
  else
    return false
  end
end

Private Instance Methods

variation_connection() click to toggle source
# File lib/bio-ensembl/core/slice.rb, line 619
def variation_connection()
  if !Ensembl::Variation::DBConnection.connected?  
    host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info
    Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
  end
  
end