class Rubabel::Molecule
Constants
- DEFAULT_FINGERPRINT
- DEFAULT_ID_TYPE
- DEFAULT_IN_TYPE
- DEFAULT_OUT_TYPE
- ID_TYPES
- ID_TYPE_KEYS
Attributes
the OpenBabel::OBmol object
Public Class Methods
# File lib/rubabel/molecule.rb, line 141 def from_atoms_and_bonds(atoms=[], bonds=[]) obj = self.new( OpenBabel::OBMol.new ) atoms.each {|atom| obj.add_atom(atom) } bonds.each {|bond| obj.add_bond(bond) } obj end
# File lib/rubabel/molecule.rb, line 97 def from_file(file, type=nil) (obmol, obconv, not_at_end) = Rubabel.read_first_obmol(file, type).first Rubabel::Molecule.new(obmol) end
requires an internet connection
# File lib/rubabel/molecule.rb, line 124 def from_id(id, type=DEFAULT_ID_TYPE) case type when :inchikey url = "http://www.chemspider.com/InChI.asmx/InChIKeyToInChI?inchi_key=" + URI::encode(id) doc_string = retrieve_info_from_url(url) doc = REXML::Document.new( doc_string ) inchi_string = doc.root.children.first.to_s raise(ArgumentError, "did not retrieve a valid inchi string") unless inchi_string[/^InChI=/] from_string(inchi_string, :inchi) when :lmid # lipidmaps id url = "http://www.lipidmaps.org/data/LMSDRecord.php?OutputType=SDF&Mode=File&LMID=" + id doc_string = retrieve_info_from_url(url) from_string(doc_string, :sdf) end end
# File lib/rubabel/molecule.rb, line 102 def from_string(string, type=DEFAULT_IN_TYPE) if type == :inchi string.prepend("InChI=") unless string[/^InChI=/] end obmol = OpenBabel::OBMol.new obconv = OpenBabel::OBConversion.new obconv.set_in_format(type.to_s) || raise(ArgumentError, "invalid format #{type}") obconv.read_string(obmol, string) || raise(ArgumentError, "invalid string" ) self.new(obmol) end
# File lib/rubabel/molecule.rb, line 215 def initialize(obmol=nil) @ob = obmol.nil? ? OpenBabel::OBMol.new : obmol end
# File lib/rubabel/molecule.rb, line 113 def retrieve_info_from_url(url) begin info = open(url) {|io| io.read } rescue => e puts "Some kind of internet connectivity error. Check your connection!" raise e end info end
# File lib/rubabel/molecule.rb, line 93 def tanimoto(mol1, mol2, type=DEFAULT_FINGERPRINT) OpenBabel::OBFingerprint.tanimoto(mol1.ob_fingerprint(type), mol2.ob_fingerprint(type)) end
Public Instance Methods
defined as whether the csmiles strings are identical. This incorporates more information than the FP2 fingerprint, for instance (try changing the charge and see how it does not influence the fingerprint). Obviously, things like title or data will not be evaluated with ==. See equal? if you are looking for identity. More stringent comparisons will have to be done by hand!
# File lib/rubabel/molecule.rb, line 385 def ==(other) other.respond_to?(:csmiles) && (csmiles == other.csmiles) end
retrieves the atom by index (accepts everything an array would)
# File lib/rubabel/molecule.rb, line 184 def [](*args) atoms[*args] end
returns the atom passed in or that was created. arg is a pre-existing atom, an atomic number or an element symbol (e.g. :c). default is to add carbon.
# File lib/rubabel/molecule.rb, line 174 def add_atom!(arg=6, bond_order=1, attach_to=nil) attach_to ||= atoms.last atom = associate_atom!(arg) add_bond!(attach_to, atom, bond_order) if attach_to atom end
takes a pair of Rubabel::Atom
objects and adds a bond to the molecule returns whether the bond creation was successful.
# File lib/rubabel/molecule.rb, line 511 def add_bond!(atom1, atom2, order=1) @ob.add_bond(atom1.idx, atom2.idx, order) end
returns self. Corrects for ph if ph is not nil. NOTE: the reversal of arguments from the OpenBabel
api.
# File lib/rubabel/molecule.rb, line 289 def add_h!(ph=nil, polaronly=false) if ph.nil? @ob.add_hydrogens(polaronly) else @ob.add_hydrogens(polaronly, true, ph) end self end
only adds polar hydrogens. returns self
# File lib/rubabel/molecule.rb, line 299 def add_polar_h! @ob.add_polar_hydrogens self end
arg may be a Fixnum, a Symbol (Elemental symbol that is a Symbol), or a Rubabel::Atom
. Returns the newly associated/created atom.
# File lib/rubabel/molecule.rb, line 151 def associate_atom!(arg) if arg.is_a?(Rubabel::Atom) @ob.add_atom(arg.ob) arg else (num, is_aromatic) = if arg.is_a?(Symbol) [Rubabel::ELEMENT_TO_NUM[arg], (arg.to_s.capitalize != arg.to_s)] else [arg, false] end new_obatom = @ob.new_atom new_obatom.set_atomic_num(num) new_obatom.set_aromatic if is_aromatic Rubabel::Atom.new(new_obatom) end end
gets the atom by id
# File lib/rubabel/molecule.rb, line 433 def atom(id) @ob.get_atom_by_id(id).upcast end
returns the array of atoms. Consider using each
# File lib/rubabel/molecule.rb, line 438 def atoms each_atom.map.to_a end
gets the bond by id
# File lib/rubabel/molecule.rb, line 423 def bond(id) @ob.get_bond_by_id(id).upcast end
returns the array of bonds. Consider using each_bond
# File lib/rubabel/molecule.rb, line 428 def bonds each_bond.map.to_a end
centers the molecule (deals with the atomic coordinate systems for 2D or 3D molecules). returns self.
# File lib/rubabel/molecule.rb, line 685 def center! @ob.center self end
# File lib/rubabel/molecule.rb, line 196 def charge() @ob.get_total_charge end
# File lib/rubabel/molecule.rb, line 197 def charge=(v) @ob.set_total_charge(v) end
returns self
# File lib/rubabel/molecule.rb, line 678 def convert_dative_bonds! @ob.convert_dative_bonds self end
returns self. If ph is nil, then neutral!
is called
# File lib/rubabel/molecule.rb, line 305 def correct_for_ph!(ph=7.4) ph.nil? ? neutral! : @ob.correct_for_ph(ph) self end
returns just the smiles string (not the id)
# File lib/rubabel/molecule.rb, line 351 def csmiles to_s(:can) end
returns a Rubabel::MoleculeData
hash
# File lib/rubabel/molecule.rb, line 563 def data Rubabel::MoleculeData.new(@ob) end
obj is an atom or bond
# File lib/rubabel/molecule.rb, line 471 def delete(obj) case obj when Rubabel::Bond delete_bond(obj) when Rubabel::Atom delete_atom(obj) else raise(ArgumentError, "don't know how to delete objects of type: #{obj.class}") end end
yields self after deleting the specified bonds. When the block is closed the bonds are restored. Returns whatever is returned from the block.
# File lib/rubabel/molecule.rb, line 518 def delete_and_restore_bonds(*bonds, &block) bonds.each do |bond| unless @ob.delete_bond(bond.ob, false) raise "#{bond.inspect} not deleted!" end end reply = block.call(self) bonds.each {|bond| @ob.add_bond(bond.ob) } reply end
# File lib/rubabel/molecule.rb, line 188 def delete_atom(atom) @ob.delete_atom(atom.ob, false) end
if given a bond, deletes it (doesn’t garbage collect). If given two atoms, deletes the bond between them.
# File lib/rubabel/molecule.rb, line 484 def delete_bond(*args) case args.size when 1 @ob.delete_bond(args[0].ob, false) when 2 @ob.delete_bond(args[0].get_bond(args[1]).ob, false) end end
returns self
# File lib/rubabel/molecule.rb, line 338 def delete_hydrogens! @ob.delete_hydrogens self end
# File lib/rubabel/molecule.rb, line 442 def dim @ob.get_dimension end
ensures that hydrogens are added before an operation, but returns the molecule to the original hydrogen or no hydrogen state when finished. returns whatever was returned by the block.
# File lib/rubabel/molecule.rb, line 271 def do_with_hydrogens(ph=nil, polaronly=false, &block) hydr_added = @ob.has_hydrogens_added add_h!(ph, polaronly) unless hydr_added reply = block.call @ob.delete_hydrogens unless hydr_added reply end
# File lib/rubabel/molecule.rb, line 279 def do_without_hydrogens(ph=nil, polaronly=false, &block) hydr_added = @ob.has_hydrogens_added @ob.delete_hydrogens if hydr_added reply = block.call add_h!(ph, polaronly) if hydr_added reply end
iterates over the molecule’s Rubabel::Atom
objects
# File lib/rubabel/molecule.rb, line 390 def each_atom(&block) # could use the C++ iterator in the future block or return enum_for(__method__) iter = @ob.begin_atoms atom = @ob.begin_atom(iter) while atom block.call atom.upcast atom = @ob.next_atom(iter) end end
iterates over the molecule’s Rubabel::Bond
objects
# File lib/rubabel/molecule.rb, line 403 def each_bond(&block) # could use the C++ iterator in the future block or return enum_for(__method__) iter = @ob.begin_bonds obbond = @ob.begin_bond(iter) while obbond block.call obbond.upcast obbond = @ob.next_bond(iter) end self end
# File lib/rubabel/molecule.rb, line 542 def each_fragment(&block) block or return enum_for(__method__) @ob.separate.each do |ob_mol| block.call( ob_mol.upcast ) end end
yields atom arrays matching the pattern. returns an enumerator if no block is given
# File lib/rubabel/molecule.rb, line 233 def each_match(smarts_or_string, uniq=true, &block) block or return enum_for(__method__, smarts_or_string, uniq) _atoms = self.atoms smarts_indices(smarts_or_string, uniq).each do |ar| block.call(_atoms.values_at(*ar)) end end
checks to see if the molecules are the same OBMol object underneath by modifying one and seeing if the other changes. This is because openbabel routinely creates new objects that point to the same underlying data store, so even checking for OBMol equivalency is not enough.
# File lib/rubabel/molecule.rb, line 360 def equal?(other) return false unless other.is_a?(self.class) are_identical = false if self.title == other.title begin obj_id = self.object_id.to_s self.title += obj_id are_identical = (self.title == other.title) ensure self.title.sub(/#{obj_id}$/,'') end are_identical else false end end
# File lib/rubabel/molecule.rb, line 204 def exact_mass() @ob.get_exact_mass end
returns a string representation of the molecular formula. Not sensitive to add_h!
# File lib/rubabel/molecule.rb, line 213 def formula() @ob.get_formula end
obconv.add_option("u",OpenBabel::OBConversion::OUTOPTIONS) self
end
# File lib/rubabel/molecule.rb, line 723 def graph_diameter distance_matrix = Array.new self.atoms.each do |a| iter = OpenBabel::OBMolAtomBFSIter.new(self.ob, a.idx) while iter.inc.deref do distance_matrix << iter.current_depth - 1 end end distance_matrix.max end
returns self
# File lib/rubabel/molecule.rb, line 703 def highlight_substructure!(substructure, color='red') tmpconv = OpenBabel::OBConversion.new tmpconv.add_option("s",OpenBabel::OBConversion::GENOPTIONS, "#{substructure} #{color}") self.ob.do_transformations(tmpconv.get_options(OpenBabel::OBConversion::GENOPTIONS), tmpconv) self end
are there hydrogens added yet
# File lib/rubabel/molecule.rb, line 263 def hydrogens_added? @ob.has_hydrogens_added end
creates a deep copy of the molecule (even the atoms are duplicated)
# File lib/rubabel/molecule.rb, line 416 def initialize_copy(source) super @ob = OpenBabel::OBMol.new(source.ob) self end
# File lib/rubabel/molecule.rb, line 673 def inspect "#<Mol #{to_s}>" end
returns self
# File lib/rubabel/molecule.rb, line 691 def kekulize! @ob.kekulize self end
adds hydrogens if necessary. Performs only steepest descent optimization (no rotors optimized) returns self
# File lib/rubabel/molecule.rb, line 594 def local_optimize!(forcefield=DEFAULT_FORCEFIELD, steps=500) add_h! unless hydrogens_added? if dim == 3 ff = Rubabel.force_field(forcefield.to_s) ff.setup(@ob) || raise(OpenBabelUnableToSetupForceFieldError) ff.steepest_descent(steps) # is the default termination count 1.0e-4 (used in obgen?) ff.update_coordinates(@ob) else make_3d!(forcefield, steps) end self end
does a bit of basic local optimization unless steps is set to nil returns self
# File lib/rubabel/molecule.rb, line 616 def make_3d!(forcefield=DEFAULT_FORCEFIELD, steps=50) BUILDER.build(@ob) @ob.add_hydrogens(false, true) unless hydrogens_added? local_optimize!(forcefield, steps) if steps self end
returns the exact_mass
corrected for charge gain/loss
# File lib/rubabel/molecule.rb, line 207 def mass @ob.get_exact_mass - (@ob.get_total_charge * Rubabel::MASS_E) end
returns an array of matching atom sets. Consider using each_match.
# File lib/rubabel/molecule.rb, line 242 def matches(smarts_or_string, uniq=true) each_match(smarts_or_string, uniq).map.to_a end
# File lib/rubabel/molecule.rb, line 246 def matches?(smarts_or_string) # TODO: probably a more efficient way to do this using API smarts_indices(smarts_or_string).size > 0 end
# File lib/rubabel/molecule.rb, line 201 def mol_wt() @ob.get_mol_wt end
simple method to coerce the molecule into a neutral charge state. It does this by removing any charge from each atom and then removing the hydrogens (which will then can be added back by the user and will be added back with proper valence). If the molecule had hydrogens added it will return the molecule with hydrogens added returns self.
# File lib/rubabel/molecule.rb, line 316 def neutral! had_hydrogens = h_added? atoms.each {|atom| atom.charge = 0 if (atom.charge != 0) } remove_h! add_h! if had_hydrogens self end
creates a new (as yet unspecified) bond associated with the molecule and gives it a unique id
# File lib/rubabel/molecule.rb, line 505 def new_bond @ob.new_bond.upcast end
sensitive to add_h!
# File lib/rubabel/molecule.rb, line 568 def num_atoms(count_implied_hydrogens=false) if !count_implied_hydrogens @ob.num_atoms else @ob.num_atoms + reduce(0) {|cnt, atom| cnt + atom.ob.implicit_hydrogen_count } end end
# File lib/rubabel/molecule.rb, line 575 def num_bonds() @ob.num_bonds end
# File lib/rubabel/molecule.rb, line 576 def num_hvy_atoms() @ob.num_hvy_atoms end
# File lib/rubabel/molecule.rb, line 577 def num_residues() @ob.num_residues end
# File lib/rubabel/molecule.rb, line 578 def num_rotors() @ob.num_rotors end
returns a std::vector<unsigned int> that can be passed directly into the OBFingerprint.tanimoto method
# File lib/rubabel/molecule.rb, line 463 def ob_fingerprint(type=DEFAULT_FINGERPRINT) fprinter = OpenBabel::OBFingerprint.find_fingerprint(type) || raise(ArgumentError, "fingerprint type not found") fp = OpenBabel::VectorUnsignedInt.new fprinter.get_fingerprint(@ob, fp) || raise("failed to get fingerprint for #{mol}") fp end
returns an array of OpenBabel::OBRing objects.
# File lib/rubabel/molecule.rb, line 252 def ob_sssr @ob.get_sssr.to_a end
yields the type of object. Expects the block to yield the image string.
# File lib/rubabel/molecule.rb, line 625 def png_transformer(type_s, out_options={}, &block) orig_out_options = out_options[:size] if type_s == 'png' png_output = true type_s = 'svg' if out_options[:size] unless out_options[:size].to_s =~ /x/i out_options[:size] = out_options[:size].to_s + 'x' + out_options[:size].to_s end end else if out_options[:size].is_a?(String) && (out_options[:size] =~ /x/i) warn 'can only use the width dimension for this format' out_options[:size] = out_options[:size].split(/x/i).first end end image_blob = block.call(type_s, out_options) if png_output st = StringIO.new image = MiniMagick::Image.read(image_blob, 'svg') image.format('png') # would like to resize as an svg, then output the png of proper # granularity... image.resize(out_options[:size]) if out_options[:size] image_blob = image.write(st).string end out_options[:size] = orig_out_options image_blob end
returns a list of atom indices matching the patterns (corresponds to the OBSmartsPattern::GetUMapList() method if uniq==true and GetMapList method if uniq==false). Note that the original GetUMapList returns atom numbers (i.e., the index + 1). This method returns the zero indexed indices.
# File lib/rubabel/molecule.rb, line 224 def smarts_indices(smarts_or_string, uniq=true) mthd = uniq ? :get_umap_list : :get_map_list pattern = smarts_or_string.is_a?(Rubabel::Smarts) ? smarts_or_string : Rubabel::Smarts.new(smarts_or_string) pattern.ob.match(@ob) pattern.ob.send(mthd).map {|atm_indices| atm_indices.map {|i| i - 1 } } end
returns just the smiles string :smi (not the id)
# File lib/rubabel/molecule.rb, line 346 def smiles to_s(:smi) end
# File lib/rubabel/molecule.rb, line 199 def spin() @ob.get_total_spin_multiplicity end
splits the molecules at the given bonds and returns the fragments. Does not alter the caller. If the molecule is already fragmented, then returns the separate fragments.
# File lib/rubabel/molecule.rb, line 532 def split(*bonds) if bonds.size > 0 delete_and_restore_bonds(*bonds) do |mol| mol.ob.separate.map(&:upcast) end else self.ob.separate.map(&:upcast) end end
returns self
# File lib/rubabel/molecule.rb, line 697 def strip_salts! @ob.strip_salts! self end
swaps to_move1 for to_move2 on the respective anchors returns self
# File lib/rubabel/molecule.rb, line 499 def swap!(anchor1, to_move1, anchor2, to_move2) OpenBabel::OBBuilder.swap(@ob, *[anchor1, to_move1, anchor2, to_move2].map {|at| at.ob.get_idx } ) self end
TODO: implement list of supported descriptors. (Not Yet Implemented!)
def descs end
# File lib/rubabel/molecule.rb, line 457 def tanimoto(other, type=DEFAULT_FINGERPRINT) other.nil? ? 0 : Rubabel::Molecule.tanimoto(self, other, type) end
attributes
# File lib/rubabel/molecule.rb, line 193 def title() @ob.get_title end
# File lib/rubabel/molecule.rb, line 194 def title=(val) @ob.set_title(val) end
emits smiles without the trailing tab, newline, or id. Use write_string
to get the default OpenBabel
behavior (ie., tabs and newlines).
# File lib/rubabel/molecule.rb, line 551 def to_s(type=DEFAULT_OUT_TYPE) string = write_string(type) case type when :smi, :smiles, :can # remove name with openbabel options in the future string.split(/\s+/).first else string end end
If filename_or_type is a symbol, then it will return a string of that type. If filename_or_type is a string, will write to the filename given no args, returns a DEFAULT_OUT_TYPE
string
# File lib/rubabel/molecule.rb, line 583 def write(filename_or_type=:can, out_options={}) if filename_or_type.is_a?(Symbol) write_string(filename_or_type, out_options) else write_file(filename_or_type, out_options) end end
writes to the file based on the extension given (must be recognized by OpenBabel
). If png is the extension or format, the png is generated from an svg.
# File lib/rubabel/molecule.rb, line 668 def write_file(filename, out_options={}) type = Rubabel.filetype(filename) File.write(filename, write_string(type, out_options)) end
out_options include any of those defined
# File lib/rubabel/molecule.rb, line 656 def write_string(type=DEFAULT_OUT_TYPE, out_options={}) png_transformer(type.to_s, out_options) do |type_s, _out_opts| obconv = out_options[:obconv] || OpenBabel::OBConversion.new obconv.set_out_format(type_s) obconv.add_opts!(:out, _out_opts) obconv.write_string(@ob) end end