class String
Overloads the {String} class.
@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>
Constants
- BINARY_CACHE
- HAS_HTML_TAG_CACHE
Public Instance Methods
binary?()
click to toggle source
# File lib/arachni/ruby/string.rb, line 179 def binary? # Stolen from YAML. BINARY_CACHE.fetch self do ( index("\x00") || count("\x00-\x7F", "^ -~\t\r\n").fdiv(length) > 0.3) end end
diff_ratio( other )
click to toggle source
Calculates the difference ratio (at a word level) between ‘self` and `other`
@param [String] other
@return [Float]
`0.0` (identical strings) to `1.0` (completely different)
# File lib/arachni/ruby/string.rb, line 120 def diff_ratio( other ) return 0.0 if self == other return 1.0 if empty? || other.empty? s_words = self.words( true ) o_words = other.words( true ) common = (s_words & o_words).size.to_f union = (s_words | o_words).size.to_f (union - common) / union end
escape_double_quote()
click to toggle source
# File lib/arachni/ruby/string.rb, line 57 def escape_double_quote gsub( '"', '\"' ) end
has_html_tag?( tag, attributes = nil )
click to toggle source
@param [String] tag
Tag name to look for, in lower case.
@param [String,Regexp] attributes
Content to look for in attributes, in lower case.
# File lib/arachni/ruby/string.rb, line 35 def has_html_tag?( tag, attributes = nil ) HAS_HTML_TAG_CACHE.fetch [self, tag, attributes] do if attributes attributes = ".*#{attributes}" end self =~ /<\s*#{tag}#{attributes}.*?>/mi end end
longest_word()
click to toggle source
@return [String]
Longest word.
# File lib/arachni/ruby/string.rb, line 153 def longest_word words( true ).sort_by { |w| w.size }.last end
persistent_hash()
click to toggle source
@return [Integer]
In integer with the property of: If `str1 == str2` then `str1.persistent_hash == str2.persistent_hash`. It basically has the same function as Ruby's `#hash` method, but does not use a random seed per Ruby process -- making it suitable for use in distributed systems.
# File lib/arachni/ruby/string.rb, line 165 def persistent_hash Zlib.crc32 self end
rdiff( other )
click to toggle source
Gets the reverse diff between self and str on a word level.
str = <<END This is the first test. Not really sure what else to put here... END str2 = <<END This is the second test. Not really sure what else to put here... Boo-Yah! END str.rdiff( str2 ) # => "This is the test.\nNot really sure what else to put here...\n"
@param [String] other
@return [String]
# File lib/arachni/ruby/string.rb, line 104 def rdiff( other ) return self if self == other # get the words of the first text in an array s_words = words # get what hasn't changed (the rdiff, so to speak) as a string (s_words - (s_words - other.words)).join end
recode()
click to toggle source
# File lib/arachni/ruby/string.rb, line 175 def recode dup.recode! end
recode!()
click to toggle source
# File lib/arachni/ruby/string.rb, line 169 def recode! force_encoding( 'utf-8' ) encode!( 'utf-8', invalid: :replace, undef: :replace ) self end
scan_in_groups( regexp )
click to toggle source
@param [Regexp] regexp
Regular expression with named captures.
@return [Hash]
Grouped matches.
# File lib/arachni/ruby/string.rb, line 24 def scan_in_groups( regexp ) raise ArgumentError, 'Regexp does not contain any names.' if regexp.names.empty? return {} if !(matches = scan( regexp ).first) Hash[regexp.names.zip( matches )].reject { |_, v| v.empty? } end
shortest_word()
click to toggle source
@return [String]
Shortest word.
# File lib/arachni/ruby/string.rb, line 147 def shortest_word words( true ).sort_by { |w| w.size }.first end
sub_in_groups( regexp, substitutions )
click to toggle source
@param [Regexp] regexp
Regular expression with named captures.
@param [Hash] substitutions
Hash (with capture names as keys) with which to replace the `regexp` matches.
@return [String]
Updated copy of self.
# File lib/arachni/ruby/string.rb, line 53 def sub_in_groups( regexp, substitutions ) dup.sub_in_groups!( regexp, substitutions ) end
sub_in_groups!( regexp, updates )
click to toggle source
@param [Regexp] regexp
Regular expression with named captures.
@param [Hash] updates
Hash (with capture names as keys) with which to replace the `regexp` matches.
@return [String]
Updated self.
# File lib/arachni/ruby/string.rb, line 69 def sub_in_groups!( regexp, updates ) return if !(match = regexp.match( self )) # updates.reject! { |k| !(match.offset( k ) rescue nil) } keys_in_order = updates.keys.sort_by { |k| match.offset( k ) }.reverse keys_in_order.each do |k| offsets_for_group = match.offset( k ) self[offsets_for_group.first...offsets_for_group.last] = updates[k] end self end
words( strict = false )
click to toggle source
Returns the words in ‘self`.
@param [Bool] strict
Include *only* words, no boundary characters (like spaces, etc.).
@return [Array<String>]
# File lib/arachni/ruby/string.rb, line 139 def words( strict = false ) splits = split( /\b/ ) splits.reject! { |w| !(w =~ /\w/) } if strict splits end