class CMSScanner::Target

Target to Scan

Scope system logic

Scope system logic

Public Class Methods

new(url, opts = {}) click to toggle source

@param [ String ] url @param [ Hash ] opts @option opts [ Array<PublicSuffix::Domain, String> ] :scope

Calls superclass method
# File lib/cms_scanner/target.rb, line 17
def initialize(url, opts = {})
  super(url, opts)

  scope << uri.host
  Array(opts[:scope]).each { |s| scope << s }
end
page_hash(page) click to toggle source

@note Comments are deleted to avoid cache generation details

@param [ Typhoeus::Response, String ] page

@return [ String ] The md5sum of the page

# File lib/cms_scanner/target/hashes.rb, line 11
def self.page_hash(page)
  page = NS::Browser.get(page, followlocation: true) unless page.is_a?(Typhoeus::Response)

  # Removes comments and script tags before computing the hash
  # to remove any potential cached stuff
  html = Nokogiri::HTML(page.body)
  html.xpath('//script|//comment()').each(&:remove)

  Digest::MD5.hexdigest(html)
end

Public Instance Methods

comments_from_page(pattern, page = nil) { |match, node| ... } click to toggle source

@param [ Regexp ] pattern @param [ Typhoeus::Response, String ] page

@return [ Array<Array<MatchData, Nokogiri::XML::Comment>> ] @yield [ MatchData, Nokogiri::XML::Comment ]

# File lib/cms_scanner/target.rb, line 72
def comments_from_page(pattern, page = nil)
  xpath_pattern_from_page('//comment()', pattern, page) do |match, node|
    yield match, node if block_given?
  end
end
error_404_hash() click to toggle source

@note This is used to detect potential custom 404 responding with a 200 @return [ String ] The hash of a 404

# File lib/cms_scanner/target/hashes.rb, line 29
def error_404_hash
  @error_404_hash ||= self.class.page_hash(error_404_res)
end
homepage_hash() click to toggle source

@return [ String ] The hash of the homepage

# File lib/cms_scanner/target/hashes.rb, line 23
def homepage_hash
  @homepage_hash ||= self.class.page_hash(url)
end
homepage_or_404?(page) click to toggle source

@param [ Typhoeus::Response, String ] page @return [ Boolean ] Wether or not the page is a the homepage or a 404 based on its md5sum

# File lib/cms_scanner/target/hashes.rb, line 35
def homepage_or_404?(page)
  homepage_and_404_hashes.include?(self.class.page_hash(page))
end
in_scope?(url_or_uri) click to toggle source

@param [ String, Addressable::URI ] url An absolute URL or URI

@return [ Boolean ] true if the url given is in scope

# File lib/cms_scanner/target/scope.rb, line 14
def in_scope?(url_or_uri)
  url_or_uri = Addressable::URI.parse(url_or_uri.strip) unless url_or_uri.is_a?(Addressable::URI)

  scope.include?(url_or_uri.host)
rescue StandardError
  false
end
in_scope_uris(res, xpath = '//@href|//@src|//@data-src') { |uri, tag| ... } click to toggle source

@param [ Typhoeus::Response ] res @param [ String ] xpath

@yield [ Addressable::URI, Nokogiri::XML::Element ] The in scope url and its associated tag

@return [ Array<Addressable::URI> ] The in scope absolute URIs detected in the response’s body

@note It is highly recommended to use the xpath parameter to focus on the uris needed, as this method can be quite

time consuming when there are a lof of uris to check
# File lib/cms_scanner/target/scope.rb, line 31
def in_scope_uris(res, xpath = '//@href|//@src|//@data-src')
  found = []

  uris_from_page(res, xpath) do |uri, tag|
    next unless in_scope?(uri)

    yield uri, tag if block_given?

    found << uri
  end

  found
end
interesting_findings(opts = {}) click to toggle source

@param [ Hash ] opts

@return [ Findings ]

# File lib/cms_scanner/target.rb, line 27
def interesting_findings(opts = {})
  @interesting_findings ||= NS::Finders::InterestingFindings::Base.find(self, opts)
end
javascripts_from_page(pattern, page = nil) { |match, node| ... } click to toggle source

@param [ Regexp ] pattern @param [ Typhoeus::Response, String ] page

@return [ Array<Array<MatchData, Nokogiri::XML::Element>> ] @yield [ MatchData, Nokogiri::XML::Element ]

# File lib/cms_scanner/target.rb, line 83
def javascripts_from_page(pattern, page = nil)
  xpath_pattern_from_page('//script', pattern, page) do |match, node|
    yield match, node if block_given?
  end
end
scope() click to toggle source

@return [ Array<PublicSuffix::Domain, String> ]

# File lib/cms_scanner/target/scope.rb, line 7
def scope
  @scope ||= Scope.new
end
scope_url_pattern() click to toggle source

Similar to Target#url_pattern but considering the in scope domains as well

@return [ Regexp ] The pattern related to the target url and in scope domains,

it also matches escaped /, such as in JSON JS data: http:\/\/t.com\/

rubocop:disable Metrics/AbcSize

# File lib/cms_scanner/target/scope.rb, line 50
def scope_url_pattern
  return @scope_url_pattern if @scope_url_pattern

  domains = [uri.host + uri.path]

  domains += if scope.domains.empty?
               Array(scope.invalid_domains[1..-1])
             else
               Array(scope.domains[1..-1]).map(&:to_s) + scope.invalid_domains
             end

  domains.map! { |d| Regexp.escape(d.delete_suffix('/')).gsub('\*', '.*').gsub('/', '\\\\\?/') }

  domains[0].gsub!(Regexp.escape(uri.host), "#{Regexp.escape(uri.host)}(?::\\d+)?") if uri.port

  @scope_url_pattern = %r{https?:\\?/\\?/(?:#{domains.join('|')})\\?/?}i
end
uris_from_page(page = nil, xpath = '//@href|//@src|//@data-src') { |node_uri, parent| ... } click to toggle source

@param [ Typhoeus::Response, String ] page @param [ String ] xpath

@yield [ Addressable::URI, Nokogiri::XML::Element ] The url and its associated tag

@return [ Array<Addressable::URI> ] The absolute URIs detected in the response’s body from the HTML tags

@note It is highly recommended to use the xpath parameter to focus on the uris needed, as this method can be quite

time consuming when there are a lof of uris to check
# File lib/cms_scanner/target.rb, line 98
def uris_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
  page    = NS::Browser.get(url(page)) unless page.is_a?(Typhoeus::Response)
  found   = []

  page.html.xpath(xpath).each do |node|
    attr_value = node.text.to_s

    next unless attr_value && !attr_value.empty?

    node_uri = begin
      uri.join(attr_value.strip)
    rescue StandardError
      # Skip potential malformed URLs etc.
      next
    end

    next unless node_uri.host

    yield node_uri, node.parent if block_given? && !found.include?(node_uri)

    found << node_uri
  end

  found.uniq
end
url_pattern() click to toggle source

@return [ Regexp ] The pattern related to the target url, also matches escaped /, such as

in JSON JS data: http:\/\/t.com\/
# File lib/cms_scanner/target.rb, line 42
def url_pattern
  @url_pattern ||= Regexp.new(Regexp.escape(url).gsub(/https?/i, 'https?').gsub('/', '\\\\\?/'), Regexp::IGNORECASE)
end
vulnerable?() click to toggle source

Weteher or not vulnerabilities have been found. Used to set the exit code of the scanner and it should be overriden in the implementation

@return [ Boolean ]

# File lib/cms_scanner/target.rb, line 36
def vulnerable?
  raise NotImplementedError
end
xpath_pattern_from_page(xpath, pattern, page = nil) { |last_match, node| ... } click to toggle source

@param [ String ] xpath @param [ Regexp ] pattern @param [ Typhoeus::Response, String ] page

@return [ Array<Array<MatchData, Nokogiri::XML::Element>> ] @yield [ MatchData, Nokogiri::XML::Element ]

# File lib/cms_scanner/target.rb, line 52
def xpath_pattern_from_page(xpath, pattern, page = nil)
  page    = NS::Browser.get(url(page)) unless page.is_a?(Typhoeus::Response)
  matches = []

  page.html.xpath(xpath).each do |node|
    next unless node.text.strip =~ pattern

    yield Regexp.last_match, node if block_given?

    matches << [Regexp.last_match, node]
  end

  matches
end

Protected Instance Methods

homepage_and_404_hashes() click to toggle source
# File lib/cms_scanner/target/hashes.rb, line 41
def homepage_and_404_hashes
  @homepage_and_404_hashes ||= [homepage_hash, error_404_hash].freeze
end