class PubliSci::Generators::MAF

Constants

BARCODE_INDEX
COLUMN_NAMES
COMPONENT_RANGES
TCGA_CODES

Public Class Methods

column_replace(entry,column,prefix,value=nil) click to toggle source
# File lib/publisci/generators/maf.rb, line 140
def column_replace(entry,column,prefix,value=nil)
  if value
    entry[COLUMN_NAMES.index(column)] = prefix + value
  else
    entry[COLUMN_NAMES.index(column)] += prefix
  end
end
official_symbol(hugo_symbol) click to toggle source
# File lib/publisci/generators/maf.rb, line 148
        def official_symbol(hugo_symbol)
          qry = <<-EOF

          SELECT distinct ?official where {
           {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
           UNION
           {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}

           ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
          }

          EOF

          sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
          sparql.query(qry).map(&:official).first.to_s
        end
parse_barcode(code) click to toggle source
# File lib/publisci/generators/maf.rb, line 165
def parse_barcode(code)
  #TCGA-E9-A22B-01A-11D-A159-09
  [code[5..11], code[13..-1]]
end
post_process(file) click to toggle source
# File lib/publisci/generators/maf.rb, line 131
def post_process(file)
  reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
  hugo_cache ||= {}
  PubliSci::PostProcessor.process(file,file,reg){|g|
    hugo_cache[g] ||= official_symbol(g)
   'http://identifiers.org/hgnc.symbol/' + cache[g]
 }
end
process_line(entry,label,options) click to toggle source
# File lib/publisci/generators/maf.rb, line 50
def process_line(entry,label,options)
    entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[BARCODE_INDEX])).flatten

    entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]

    # A 0 in the entrez-id column appears to mean null
    col=1
    entry[col] = nil if entry[col] == '0'
    entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]

    # Only link non-novel dbSNP entries
    col = COLUMN_NAMES.index('dbSNP_RS')
    if entry[col] && entry[col][0..1] == "rs"
      entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
    end

    # optionally create typed objects using sio nodes
    if options[:complex_objects]
      entry = sio_values(entry)
    end

    data = {}
    COLUMN_NAMES.each_with_index{|col,i|
      data[col] = [entry[i]]
    }

    observations(options[:measures],options[:dimensions],options[:codes],data,[label],options[:dataset_name],options).first
end
process_options(options) click to toggle source
# File lib/publisci/generators/maf.rb, line 41
def process_options(options)
  options[:dimensions] = dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
  options[:codes] = codes = dimensions
  options[:measures] = (COLUMN_NAMES - dimensions - codes)
  options[:dataset_name] ||= "MAF_#{Time.now.nsec.to_s(32)}"

  options
end
sio_values(entry) click to toggle source
# File lib/publisci/generators/maf.rb, line 79
def sio_values(entry)
  entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]

  # Link entrez genes
  col=1
  entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]

  col = COLUMN_NAMES.index('dbSNP_RS')
  entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])

  # test SIO attributes for chromosome
  col = COLUMN_NAMES.index('Chromosome')
  entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])

  # More SIO attrtibutes for alleles
  %w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
    col = COLUMN_NAMES.index(name)
    entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
  }

  col = COLUMN_NAMES.index("Strand")
  entry[col] = sio_attribute("http://edamontology.org/data_0853",entry[col])

  col = COLUMN_NAMES.index("Center")
  entry[col] = sio_attribute("foaf:homepage",entry[col])

  # Use faldo for locations End_Position
  col = COLUMN_NAMES.index("Start_Position")
  entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")

  col = COLUMN_NAMES.index("End_Position")
  entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")

  entry
end
structure(options={}) click to toggle source
# File lib/publisci/generators/maf.rb, line 115
def structure(options={})

  options = process_options(options)

  str = prefixes(options[:dataset_name],options)
  str << data_structure_definition(options[:measures],options[:dimensions],options[:codes],options[:dataset_name],options)
  str << dataset(options[:dataset_name],options)
  component_specifications(options[:measures], options[:dimensions], options[:codes], options[:dataset_name], options).map{ |c| str << c }
  measure_properties(options[:measures],options[:dataset_name],options).map{|m| str << m}
  dimension_properties(options[:dimensions],options[:codes], options[:dataset_name],options).map{|d| str << d}
  code_lists(options[:codes],TCGA_CODES,options[:dataset_name],options).map{|c| str << c}
  concept_codes(options[:codes],TCGA_CODES,options[:dataset_name],options).map{|c| str << c}

  str
end
write(record, out, label, options={}) click to toggle source
# File lib/publisci/generators/maf.rb, line 25
def write(record, out, label, options={})

    options = process_options(options)

    options[:no_labels] ||= true
    options[:lookup_hugo] ||= false
    options[:complex_objects] ||= false
    options[:ranges] ||= COMPONENT_RANGES

    write_to(out, process_line(record, label, options))
end
write_structure(input, output, options) click to toggle source
# File lib/publisci/generators/maf.rb, line 37
def write_structure(input, output, options)
  write_to(output, structure(options))
end