class TandemSearchTool
Attributes
default_data_path[R]
defaults_path[R]
supported_xtandem_keys[R]
taxonomy_path[R]
Public Class Methods
new()
click to toggle source
Calls superclass method
SearchTool::new
# File lib/protk/tandem_search_tool.rb, line 24 def initialize super([ :database, :explicit_output, :over_write, :enzyme, :modifications, :mass_tolerance_units, :mass_tolerance, :multi_isotope_search, :missed_cleavages, :cleavage_semi, :methionine_oxidation, :glyco, :acetyl_nterm, :threads ]) @xtandem_keys_with_single_multiplicity = { :fragment_tol => "spectrum, fragment monoisotopic mass error", :missed_cleavages => "scoring, maximum missed cleavage sites", :cleavage_semi => "protein, cleavage semi", :precursor_tolu => "spectrum, parent monoisotopic mass error units", :multi_isotope_search => "spectrum, parent monoisotopic mass isotope error", :fragment_tolu => "spectrum, fragment monoisotopic mass error units", :acetyl_nterm => "protein, quick acetyl", :output_spectra => "output, spectra", :threads => "spectrum, threads", :enzyme => "protein, cleavage site" } @xtandem_keys_for_precursor_tol = { :precursor_tol => ["spectrum, parent monoisotopic mass error minus", "spectrum, parent monoisotopic mass error plus"] } @defaults_path="#{File.dirname(__FILE__)}/data/tandem_params.xml" @taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml" @default_data_path="#{File.dirname(__FILE__)}/data/" @option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..." @options.output_suffix="_tandem" @options.enzyme="[RK]|{P}" add_value_option(:tandem_params,"isb_native",['-T', '--tandem-params tandem', 'Either the full path to an xml file containing a complete set of default parameters, or one of the following (isb_native,isb_kscore,gpm). Default is isb_native']) add_boolean_option(:keep_params_files,false,['-K', '--keep-params-files', 'Keep X!Tandem parameter files']) add_boolean_option(:output_spectra,false,['--output-spectra', 'Include spectra in the output file']) end
Public Instance Methods
params_doc(db_info,taxo_path,input_path,output_path)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 236 def params_doc(db_info,taxo_path,input_path,output_path) params_parser=XML::Parser.file(@defaults_path) std_params=params_parser.parse throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path) end
taxonomy_doc(db_info)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 215 def taxonomy_doc(db_info) throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase database_path=db_info.path taxon=db_info.name # Parse taxonomy template file # taxo_parser=XML::Parser.file(@taxonomy_path) taxo_doc=taxo_parser.parse taxon_label=taxo_doc.find('/bioml/taxon') throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1 taxon_label[0].attributes['label']=taxon db_file=taxo_doc.find('/bioml/taxon/file') throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1 db_file[0].attributes['URL']=database_path taxo_doc end
Private Instance Methods
append_option(std_params, tandem_key, value)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 81 def append_option(std_params, tandem_key, value) notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]") if notes.length == 0 node = XML::Node.new('note') node["type"] = "input" node["label"] = tandem_key node.content = value std_params.find('/bioml')[0] << node else throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1 notes[0].content = append_string(notes[0].content, value) end end
generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 114 def generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path) # # The TandemSearchTool class has a special defaults system # Defaults are read from (a) The commandline (b) A defaults file (c) commandline defaults. # The ideal priority order is a -> b -> c # # In order to support this we need to read the defaults file and check options defined there # against those defined on the commandline # # In addition, we support some default parameter files built-in to protk. These are treated the same # but are specified if the user provides a keyword rather than a path # default_params_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]') throw "Exactly one list path, default parameters note is required in the parameter file" unless default_params_notes.length==1 is_file=File.exists?(self.tandem_params) if is_file default_params_notes[0].content="#{self.tandem_params}" else default_params_notes[0].content="#{@default_data_path}tandem_#{self.tandem_params}_defaults.xml" end keys_in_params_file=tandem_keys_in_params_file(default_params_notes[0].content) keys_on_commandline=@options_defined_by_user.keys # Set the input and output paths # set_option(std_params,"spectrum, path",input_path) set_option(std_params,"output, path",output_path) # Taxonomy and Database # set_option(std_params,"list path, taxonomy information",taxo_path) set_option(std_params,"protein, taxon",db_info.name) # set_option(std_params, "protein, cleavage semi", self.cleavage_semi ? "yes" : "no") # Simple options (unique with a 1:1 mapping to parameters from this tool) # @xtandem_keys_with_single_multiplicity.each_pair do |commandline_option_key, xtandem_key| if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key)) opt_val=self.send(commandline_option_key) if opt_val.is_a?(TrueClass) || opt_val.is_a?(FalseClass) opt_val = opt_val ? "yes" : "no" end append_option(std_params,xtandem_key,GalaxyUtil.decode_galaxy_string!(opt_val.to_s)) end end # Precursor mass tolerance is a special case as it requires two xtandem options # @xtandem_keys_for_precursor_tol.each_pair do |commandline_option_key, xtandem_keys| xtandem_keys.each do |xtandem_key| if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key)) append_option(std_params,xtandem_key,(self.precursor_tol.to_f*0.5).to_s) end end end # Per residue Fixed and Variable Modifications # # These can be added using a variety of methods in xtandem # # residue, potential modification mass # residue, modification mass # residue, potential modification motif # # We support these primarily via the var_mods and fix_mods commandline params # Modification masses and/or motifs can be entered via these arguments # var_mods = self.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? } var_mods=var_mods.collect {|mod| GalaxyUtil.decode_galaxy_string!(mod) } # var_mods allows motif's as well as standard mods. These should be in a separate array var_motifs = [].replace(var_mods) var_mods.delete_if {|mod| mod.xtandem_modification_motif? } var_motifs.keep_if {|mod| mod.xtandem_modification_motif? } fix_mods = self.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? } fix_mods=fix_mods.collect {|mod| GalaxyUtil.decode_galaxy_string!(mod)} # We also support the --glyco and --methionineo shortcuts. # Add these here. No check is made for duplication # var_motifs << "0.998@N!{P}[ST]" if self.glyco var_mods << "15.994915@M" if self.methionine_oxidation append_option(std_params,"residue, modification mass",fix_mods.join(",")) unless fix_mods.length==0 append_option(std_params,"residue, potential modification mass",var_mods.join(",")) unless var_mods.length==0 append_option(std_params,"residue, potential modification motif",var_motifs.join(",")) unless var_motifs.length==0 std_params end
set_option(std_params, tandem_key, value)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 75 def set_option(std_params, tandem_key, value) notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]") throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1 notes[0].content=value.to_s end
tandem_keys_in_params_file(default_params_path)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 95 def tandem_keys_in_params_file(default_params_path) params_parser=XML::Parser.file(default_params_path) default_params=params_parser.parse input_nodes=default_params.find('/bioml/note[@type="input"]') defined_keys=[] input_nodes.each do |node| defined_keys << node.attributes['label'] end defined_keys end
taxon_from_taxonomy_file(taxo_path)
click to toggle source
# File lib/protk/tandem_search_tool.rb, line 106 def taxon_from_taxonomy_file(taxo_path) taxo_parser=XML::Parser.file(taxo_path) taxo_doc=taxo_parser.parse taxon_nodes=taxo_doc.find('/bioml/taxon') throw "Exactly one taxon entry allowed in taxonomy file but found #{taxon_nodes.length}" unless taxon_nodes.length==1 taxon_nodes[0].attributes['label'] end