module RGFA::LinearPaths

Methods for the RGFA class, which allow to find and merge linear paths.

Constants

LN

Public Instance Methods

linear_path(s, exclude = Set.new) click to toggle source

Find a path without branches.

The path must include segment and excludes segments in exclude. Any segment used in the returned path will be added to exclude

@param s [String|RGFA::Line::Segment] a segment name or instance @param exclude [Set<String>] a set of segment names to exclude from the path @return [Array<RGFA::SegmentEnd>]

# File lib/rgfa/linear_paths.rb, line 21
def linear_path(s, exclude = Set.new)
  s = s.to_sym
  cs = connectivity(s)
  segpath = RGFA::SegmentEndsPath.new()
  [:B, :E].each_with_index do |et, i|
    if cs[i] == 1
      exclude << s
      segpath.pop
      segpath += traverse_linear_path(RGFA::SegmentEnd.new([s, et]), exclude)
    end
  end
  return (segpath.size < 2) ? nil : segpath
end
linear_paths() click to toggle source

Find all unbranched paths in the graph.

@return [Array<Array<RGFA::SegmentEnd>>]

# File lib/rgfa/linear_paths.rb, line 38
def linear_paths
  exclude = Set.new
  retval = []
  segnames = segment_names
  progress_log_init(:linear_paths, "segments", segnames.size,
    "Detect linear paths (#{segnames.size} segments)")  if @progress
  segnames.each do |sn|
    progress_log(:linear_paths) if @progress
    next if exclude.include?(sn)
    retval << linear_path(sn, exclude)
  end
  progress_log_end(:linear_paths)
  return retval.compact
end
merge_linear_path(segpath, **options) click to toggle source

Merge a linear path, i.e. a path of segments without extra-branches @!macro [new] merge_lim

Limitations: all containments und paths involving merged segments are
deleted.

@param segpath [Array<RGFA::SegmentEnd>] a linear path, such as that

retrieved by {#linear_path}

@!macro [new] merge_options

@param options [Hash] optional keyword arguments
@option options [String, :short, nil] :merged_name (nil)
  if nil, the merged_name is automatically computed; if :short,
  a name is computed starting with "merged1" and calling next until
  an available name is founf; if String, the name to use
@option options [Boolean] :cut_counts (false)
  if true, total count in merged segment m, composed of segments
  s of set S is multiplied by the factor Sum(|s in S|)/|m|

@return [RGFA] self @see merge_linear_paths

# File lib/rgfa/linear_paths.rb, line 72
def merge_linear_path(segpath, **options)
  return if segpath.size < 2
  segpath.map!{|se|se.to_segment_end}
  if segpath[1..-2].any? {|sn,et| connectivity(sn) != [1,1]}
    raise ArgumentError, "The specified path is not linear"
  end
  merged, first_reversed, last_reversed =
    create_merged_segment(segpath, options)
  self << merged
  link_merged(merged.name, segpath.first.to_segment_end.invert_end_type,
              first_reversed)
  link_merged(merged.name, segpath.last, last_reversed)
  segpath.each do |sn_et|
    delete_segment(sn_et.segment)
    progress_log(:merge_linear_paths, 0.05) if @progress
  end
  self
end
merge_linear_paths(**options) click to toggle source

Merge all linear paths in the graph, i.e. paths of segments without extra-branches @!macro merge_lim @!macro merge_options

@return [RGFA] self

# File lib/rgfa/linear_paths.rb, line 97
def merge_linear_paths(**options)
  paths = linear_paths
  psize = paths.flatten.size / 2
  progress_log_init(:merge_linear_paths, "segments", psize,
    "Merge #{paths.size} linear paths (#{psize} segments)") if @progress
  paths.each do |path|
    merge_linear_path(path, **options)
  end
  progress_log_end(:merge_linear_paths)
  self
end

Private Instance Methods

add_segment_to_merged(merged, segment, reversed, cut, init, options) click to toggle source
# File lib/rgfa/linear_paths.rb, line 190
def add_segment_to_merged(merged, segment, reversed, cut, init, options)
  s = (reversed ? segment.sequence.rc[cut..-1] : segment.sequence[cut..-1])
  if init
    merged.sequence = s
    merged.name = (options[:merged_name].nil? ?
                   segment.name : options[:merged_name])
    merged.LN = segment.LN
  else
    (segment.sequence == "*") ? (merged.sequence = "*")
                              : (merged.sequence += s)
    if options[:merged_name].nil?
      merged.name = "#{merged.name}_#{segment.name}"
    end
    if merged.LN
      segment.LN ? merged.LN += (segment.LN - cut)
                 : merged.LN = nil
    end
  end
end
create_merged_segment(segpath, options) click to toggle source
# File lib/rgfa/linear_paths.rb, line 210
def create_merged_segment(segpath, options)
  merged = segment!(segpath.first.first).clone
  total_cut = 0
  a = segpath.first
  first_reversed = (a.end_type == :B)
  last_reversed = nil
  if options[:merged_name] == :short
    forbidden = (segment_names + path_names)
    options[:merged_name] = "merged1"
    while forbidden.include?(options[:merged_name])
      options[:merged_name] = options[:merged_name].next
    end
  end
  add_segment_to_merged(merged, segment(a.segment), first_reversed, 0, true,
                        options)
  progress_log(:merge_linear_paths, 0.95) if @progress
  (segpath.size-1).times do |i|
    b = segpath[i+1].to_segment_end.invert_end_type
    l = link!(a, b)
    if l.overlap == []
      cut = 0
    elsif l.overlap.all?{|op|[:M, :"="].include?(op.code)}
      cut = l.overlap.map(&:len).inject(:+)
    else
      raise ArgumentError,
        "Merging is only allowed if all operations are M/="
    end
    total_cut += cut
    last_reversed = (b[1] == :E)
    add_segment_to_merged(merged, segment(b.segment), last_reversed, cut,
                          false, options)
    a = b.to_segment_end.invert_end_type
    if @progress
      progress_log(:merge_linear_paths, 0.95)
    end
  end
  if merged.sequence != "*"
    if merged.LN.nil?
      merged.LN = merged.sequence.length
    elsif @validate and merged.LN != merged.sequence.length
      raise RGFA::Line::Segment::InconsistentLengthError,
            "Computed sequence length #{merged.sequence.length} "+
            "and computed LN #{merged.LN} differ"
    end
  end
  if merged.LN.nil?
    [:KC, :RC, :FC].each {|count_tag| merged.set(count_tag, nil)}
  else
    sum_of_counts(segpath, (options[:cut_counts] ?
                            merged.LN.to_f / (total_cut+merged.LN) : 1)).
        each do |count_tag, count|
      merged.set(count_tag, count)
    end
  end
  return merged, first_reversed, last_reversed
end
reverse_pos_array(pos_array, lastpos) click to toggle source
# File lib/rgfa/linear_paths.rb, line 185
def reverse_pos_array(pos_array, lastpos)
  return nil if pos_array.nil? or lastpos.nil?
  pos_array.map {|pos| lastpos - pos + 1}.reverse
end
reverse_segment_name(name, separator) click to toggle source
# File lib/rgfa/linear_paths.rb, line 172
def reverse_segment_name(name, separator)
  name.to_s.split(separator).map do |part|
    openp = part[0] == "("
    part = part[1..-1] if openp
    closep = part[-1] == ")"
    part = part[0..-2] if closep
    part = (part[-1] == "^") ? part[0..-2] : part+"^"
    part += ")" if openp
    part = "(#{part}" if closep
    part
  end.reverse.join(separator)
end
sum_of_counts(segpath, multfactor = 1) click to toggle source
# File lib/rgfa/linear_paths.rb, line 155
def sum_of_counts(segpath, multfactor = 1)
  retval = {}
  segs = segpath.map {|sn,et|segment!(sn)}
  [:KC, :RC, :FC].each do |count_tag|
    segs.each do |s|
      if s.optional_fieldnames.include?(count_tag)
        retval[count_tag] ||= 0
        retval[count_tag] += s.get(count_tag)
      end
    end
    if retval[count_tag]
      retval[count_tag] = (retval[count_tag] * multfactor).to_i
    end
  end
  return retval
end
traverse_linear_path(segment_end, exclude) click to toggle source

Traverse the links, starting from the segment from :E end if traverse_from_E_end is true, or :B end otherwise.

If any segment after from is found whose name is included in exclude the traversing is interrupted. The exclude set is updated, so that circular paths are avoided.

Arguments:

- +from+ -> first segment
- +traverse_from_E_end+ -> if true, start from E end, otherwise from B end
- +exclude+ -> Set of names of already visited segments

*Side Effects*:

- Any element added to the returned list is also added to +exclude+

Returns:

- An array of segment names of the unbranched path.
  If +from+ is not an element of an unbranched path then [].
  Otherwise the first (and possibly only) element is +from+.
  All elements in the index range 1..-2 are :internal.
# File lib/rgfa/linear_paths.rb, line 131
def traverse_linear_path(segment_end, exclude)
  list = RGFA::SegmentEndsPath.new()
  current = segment_end
  loop do
    after  = links_of(current)
    before = links_of(current.to_segment_end.invert_end_type)
    cs = connectivity_symbols(before.size, after.size)
    if cs == [1,1] or list.empty?
      list << current
      exclude << current.name
      l = after.first
      current = l.other_end(current).invert_end_type
      break if exclude.include?(current.name)
    elsif cs[0] == 1
      list << current
      exclude << current.name
      break
    else
      break
    end
  end
  return segment_end.end_type == :B ? list.reverse : list
end