class BioDSL::Random

Pick number of rand om records from the stream.

random can be used to pick a random number of records from the stream. Note that the order of records is preserved.

Using the ‘pair: true` option allows random picking of interleaved paired-end sequence records.

Usage

random(<number: <uint>[, pairs: <bool>])

Options

Examples

To pick some random records from the stream do:

BD.new.
read_fasta(input: "in.fna").
random(number: 10_000).
write_fasta(output: "out.fna").
run

Constants

STATS

Public Class Methods

new(options) click to toggle source

Constructor for Randowm.

@param options [Hash] Options hash.

@option options [Fixnum] :number @option options [Boolean] :pairs

@return [Random] Class instance.

# File lib/BioDSL/commands/random.rb, line 66
def initialize(options)
  @options = options
  @wanted  = nil

  check_options
end

Public Instance Methods

lmb() click to toggle source

Return command lambda for random.

@return [Proc] Command lambda.

# File lib/BioDSL/commands/random.rb, line 76
def lmb
  lambda do |input, output, status|
    status_init(status, STATS)

    TmpDir.create('random') do |file, _|
      process_input(input, file)
      decide_wanted
      process_output(output, file)
    end
  end
end

Private Instance Methods

check_options() click to toggle source

Check options.

# File lib/BioDSL/commands/random.rb, line 91
def check_options
  options_allowed(@options, :number, :pairs)
  options_required(@options, :number)
  options_allowed_values(@options, pairs: [nil, true, false])
  options_assert(@options, ':number > 0')
end
decide_wanted() click to toggle source

Compile a random set of numbers.

# File lib/BioDSL/commands/random.rb, line 115
def decide_wanted
  if @options[:pairs]
    decide_wanted_pairs
  else
    @wanted =
      (0...@status[:records_in]).to_a.shuffle[0...@options[:number]].to_set
  end
end
decide_wanted_pairs() click to toggle source

Compile a random set of number pairs.

# File lib/BioDSL/commands/random.rb, line 125
def decide_wanted_pairs
  @wanted = Set.new
  range   = (0...@status[:records_in])
  num     = @options[:number] / 2

  range.to_a.shuffle.select(&:even?)[0...num].each do |i|
    @wanted.merge([i, i + 1])
  end
end
process_input(input, file) click to toggle source

Serialize records from input

@param input [Enumerator] Input stream. @param file [String] Path to temporary file.

# File lib/BioDSL/commands/random.rb, line 102
def process_input(input, file)
  File.open(file, 'wb') do |ios|
    BioDSL::Serializer.new(ios) do |s|
      input.each do |record|
        @status[:records_in] += 1

        s << record
      end
    end
  end
end
process_output(output, file) click to toggle source

Read records from temporary file and emit wanted records to the output stream.

@param output [Enumerator::Yielder] Output stream. @param file [String] Path to termorary file with records.

# File lib/BioDSL/commands/random.rb, line 140
def process_output(output, file)
  File.open(file, 'rb') do |ios|
    BioDSL::Serializer.new(ios) do |s|
      s.each_with_index do |record, i|
        if @wanted.include? i
          output << record
          @status[:records_out] += 1
        end
      end
    end
  end
end