class PepXML

require 'rexml/document' require 'rexml/xpath'

Attributes

file_name[RW]

Public Class Methods

new(file_name) click to toggle source
# File lib/protk/pepxml.rb, line 13
def initialize(file_name)
  @file_name=file_name

  XML::Error.set_handler(&XML::Error::QUIET_HANDLER)
  pepxml_parser=XML::Parser.file("#{file_name}")

  @pepxml_ns_prefix="xmlns:"
  @pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
  @pepxml_doc=pepxml_parser.parse
  if not @pepxml_doc.root.namespaces.default
    @pepxml_ns_prefix=""
    @pepxml_ns=nil
  end
end

Public Instance Methods

extract_db() click to toggle source

Obtain the database name from the given input file

# File lib/protk/pepxml.rb, line 32
def extract_db()
  reader = XML::Reader.file(self.file_name)
  throw "Failed to open xml file #{file_name}" unless reader!=nil

  while(reader.read)
    # For pep.xml files
    #
    if ( reader.name == "search_database" )
      dbnode=reader.expand
      dbvalue=dbnode['local_path']
      reader.close
      return dbvalue
    end

    # For prot.xml files
    #
    if ( reader.name == "protein_summary_header" )
      dbnode=reader.expand
      dbvalue=dbnode['reference_database']
      reader.close
      return dbvalue
    end
    
    
    
  end

end
extract_engine() click to toggle source

Obtain the search engine name from the input file The name of the engine is returned in lowercase and should contain no spaces Names of common engines are searched for and extracted in simplified form if possible

# File lib/protk/pepxml.rb, line 67
def extract_engine()
  reader = XML::Reader.file(self.file_name)
  throw "Failed to open xml file #{file_name}" unless reader!=nil

  while(reader.read)
    if ( reader.name == "search_summary" )
      dbnode=reader.expand
      dbvalue=dbnode['search_engine']
      reader.close
      engine_name=dbvalue.gsub(/ /,"_")
      engine_name=engine_name.gsub(/\(/,"")
      engine_name=engine_name.gsub(/\)/,"")
      engine_name=engine_name.gsub(/\!/,"")        
      return engine_name.downcase
    end
  end
end
extract_enzyme() click to toggle source
# File lib/protk/pepxml.rb, line 86
def extract_enzyme()
  reader = XML::Reader.file(self.file_name)
  throw "Failed to open xml file #{file_name}" unless reader!=nil

  while(reader.read)
    if ( reader.name == "sample_enzyme" )
      dbnode=reader.expand
      dbvalue=dbnode['name']
      reader.close        
      return dbvalue.downcase
    end
  end
end
find_runs() click to toggle source

TODO: Make this faster and more memory efficient by using XML::Reader as in the functions above

# File lib/protk/pepxml.rb, line 146
def find_runs()


  run_summaries = @pepxml_doc.find("//#{@pepxml_ns_prefix}msms_run_summary", @pepxml_ns)

  runs = {}
  run_summaries.each do |summary|
    base_name = summary.attributes["base_name"]
    if not runs.has_key?(base_name)
      bn = summary.attributes["base_name"]

      runs[base_name] = {:base_name => summary.attributes["base_name"]}

      if is_valid_type(type_from_summary_attributes(summary.attributes))
        runs[base_name][:type] = type_from_summary_attributes(summary.attributes)
      elsif is_valid_type(type_from_base_name(bn))
        runs[base_name][:type] = type_from_base_name(bn)
      else
        runs[base_name][:type] = "mzML" # Same guess as peptide prophet makes
      end

    end
  end
  runs
end
is_valid_type(type) click to toggle source
# File lib/protk/pepxml.rb, line 130
def is_valid_type(type)
  case type
  when /^mgf$/i
    return true
  when /^mzML$/i
    return true
  when /^mzXML$/i
    return true
  else
    return false
  end
end
type_from_base_name(basename) click to toggle source
# File lib/protk/pepxml.rb, line 102
def type_from_base_name(basename)
  # A common error is for tools to include the extension in the base_name attribute.
  # We exploit this to guess the type
  ext_guess=""
  case basename
  when /.mgf$/
    ext_guess="mgf"
  when /.mzML$/
    ext_guess="mzML"
  when /.mzXML$/
    ext_guess="mzXML"
  else
    ext_guess=""
  end
  ext_guess
end
type_from_summary_attributes(atts) click to toggle source
# File lib/protk/pepxml.rb, line 119
def type_from_summary_attributes(atts)
  if is_valid_type(atts["raw_data_type"])
    return  atts["raw_data_type"]
  end

  if is_valid_type(atts["raw_data"])
    return atts["raw_data"]
  end
  return ""
end