class StorageVisualizer

Attributes

available[RW]
available_gb[RW]
capacity[RW]

disk Bytes

capacity_gb[RW]

disk GB for display

dir_tree[RW]

this is the root DirNode object

diskhash[RW]
dupe_counter[RW]
target_dir[RW]

other

target_volume[RW]
threshold_pct[RW]
tree[RW]
tree_formatted[RW]
used[RW]
used_gb[RW]

Public Class Methods

install() click to toggle source
# File lib/storage_visualizer.rb, line 66
def self.install
  # This function installs a copy & symlink into /usr/local/bin, so the utility can simply be run by typing `storage_visualizer`
  # To install for command line use type:
  # git clone https://github.com/teecay/StorageVisualizer.git && ./StorageVisualizer/storage_visualizer.rb --install
  # To install for gem usage type:
  # gem install storage_visualizer


  script_src_path = File.expand_path(__FILE__) # File.expand_path('./StorageVisualizer/lib/storage_visualizer.rb')
  script_dest_path = '/usr/local/bin/storage_visualizer.rb'
  symlink_path = '/usr/local/bin/storage_visualizer'


  if (!File.exist?(script_src_path))
    raise "Error: file does not exist: #{script_src_path}"
  end


  if (File.exist?(script_dest_path))
    puts "Removing old installed script"
    File.delete(script_dest_path)
  end


  if (File.exist?(symlink_path))
    puts "Removing old symlink"
    File.delete(symlink_path)
  end

  cp_cmd = "cp -f #{script_src_path} #{script_dest_path}"
  puts "Copying script into place: #{cp_cmd}"
  `#{cp_cmd}`

  ln_cmd = "ln -s #{script_dest_path} #{symlink_path}"
  puts "Installing: #{ln_cmd}"
  `#{ln_cmd}`

  chmod_cmd = "chmod ugo+x #{symlink_path}"
  puts "Setting permissions: #{chmod_cmd}"
  `#{chmod_cmd}`

  puts "Installation is complete, run `storage_visualizer -h` for help"
end
new(target_dir_passed = nil) click to toggle source

Constructor

# File lib/storage_visualizer.rb, line 143
def initialize(target_dir_passed = nil)

  if (target_dir_passed != nil)
    expanded = File.expand_path(target_dir_passed)
    # puts "Target dir: #{expanded}"
    if (Dir.exist?(expanded))
      self.target_dir = expanded
    else
      raise "Target directory does not exist: #{expanded}"
    end
  else
    # no target passed, use the user's home dir
    self.target_dir = File.expand_path('~')
  end
  
  # how much space is considered worthy of noting on the chart
  self.threshold_pct = 0.05
  self.diskhash = {}
  self.tree = []
  self.tree_formatted = ''
  self.dupe_counter = 0
end
print_usage() click to toggle source

Static

Public Instance Methods

analyze_dirs(dir_to_analyze, parent) click to toggle source

Crawl the dirs recursively, beginning with the target dir

# File lib/storage_visualizer.rb, line 393
def analyze_dirs(dir_to_analyze, parent)


  # bootstrap case
  # don't create an entry for the root because there's nothing to link to yet, scan the subdirs
  if (dir_to_analyze == self.target_volume)
    # puts "Dir to analyze is the target volume"
    # run on all child dirs, not this dir
    Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
      # puts "\tentry: >#{file}<"
      full_path = File.join(dir_to_analyze, name)
      if (Dir.exist?(full_path) && !File.symlink?(full_path))
        # puts "Contender: >#{full_path}<"
        analyze_dirs(full_path, self.dir_tree)
      end
    end
    return
  end

  # use "P" to help prevent following any symlinks
  cmd = "du -sxkP \"#{dir_to_analyze}\""
  puts "\trunning #{cmd}"
  output = `#{cmd}`.strip().split("\t")
  # puts "Du output:"
  # pp output
  size = output[0].to_i 
  size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024)}"
  # puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"

  # Occupancy as a fraction of total space
  # occupancy = (size.to_f / self.capacity.to_f)

  # Occupancy as a fraction of USED space
  occupancy = (size.to_f / self.used.to_f)

  occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
  capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024)}"
  
  # if this dir contains more than 5% of disk space, add it to the tree

  if (dir_to_analyze == self.target_dir)
    # puts "Dir to analyze is the target dir, space used outside this dir.."
    # account for space used outside of target dir
    other_space = self.used - size
    other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
    parent.children.push(DirNode.new(parent, self.target_volume, 'Other Space', other_space_gb))
  end
  
  
  if (occupancy > self.threshold_pct)
    # puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
    puts "Dir contains more than 5% of used disk space: #{dir_to_analyze} \n\tsize:\t\t#{size_gb} / \n\toccupancy:\t#{self.used_gb} = #{occupancy_pct}% of used space"

    # puts "Dir to analyze (#{dir_to_analyze}) is not the target dir (#{self.target_dir})"
    dirs = dir_to_analyze.split('/')
    
    short_dir = dirs.pop().gsub("'","\\\\'")
    full_parent = dirs.join('/')
    if (dir_to_analyze == self.target_dir || full_parent == self.target_volume)
      # puts "Either this dir is the target dir, or the parent is the target volume, make parent the full target volume"
      short_parent = self.target_volume.gsub("'","\\\\'")
    else
      # puts "Neither this dir or parent is the target dir, making parent short"
      short_parent = dirs.pop().gsub("'","\\\\'")
    end
    

    this_node = DirNode.new(parent, dir_to_analyze, short_dir, size_gb)
    parent.children.push(this_node)

    # run on all child dirs
    Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
      full_path = File.join(dir_to_analyze, name)
      # don't follow any symlinks
      if (Dir.exist?(full_path) && !File.symlink?(full_path))
        # puts "Contender: >#{full_path}<"
        analyze_dirs(full_path, this_node)
      end
    end
    
  end # occupancy > threshold
    
end
format_data_for_the_chart() click to toggle source
# File lib/storage_visualizer.rb, line 168
def format_data_for_the_chart

  # Build the list of nodes
  nodes = []
  nodes.push(self.dir_tree)
  comparison_list = []
  while true
    if (nodes.length == 0)
      break
    end
    node = nodes.shift
    comparison_list.push(node)
    nodes.concat(node.children)
  end
  

  # format the data for the chart
  working_string = "[\n"
  comparison_list.each_with_index do |entry, index|
    if (entry.parent == nil)
      next
    end
    if(index == comparison_list.length - 1)
      # this is the next to last element, it gets no comma
      working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ]\n"
    else
      # mind the comma
      working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ],\n"
    end
  end
  working_string << "]\n"
  self.tree_formatted = working_string
  
end
get_basic_disk_info() click to toggle source
# File lib/storage_visualizer.rb, line 294
def get_basic_disk_info
  # df -l gets info about locally-mounted filesystems
  output = `df -k`
  
  # OSX:
  #   Filesystem                          1024-blocks  Used      Available   Capacity   iused     ifree       %iused  Mounted on
  #   /dev/disk1                          975912960    349150592 626506368   36%        87351646  156626592   36%     /
  #   localhost:/QwnJE6UBvlR1EvqouX6gMM   975912960    975912960         0   100%        0         0          100%    /Volumes/MobileBackups
  
  # CentOS:
  #   Filesystem     1K-blocks    Used Available Use% Mounted on
  #   /dev/xvda1      82436764 3447996  78888520   5% /
  #   devtmpfs        15434608      56  15434552   1% /dev
  #   tmpfs           15443804       0  15443804   0% /dev/shm
  
  # Ubuntu:
  #   Filesystem     1K-blocks   Used Available Use% Mounted on
  #   /dev/xvda1      30832636 797568  28676532   3% /
  #   none                   4      0         4   0% /sys/fs/cgroup
  #   udev             3835900     12   3835888   1% /dev
  #   tmpfs             769376    188    769188   1% /run
  #   none                5120      0      5120   0% /run/lock
  #   none             3846876      0   3846876   0% /run/shm
  #   none              102400      0    102400   0% /run/user
  #   /dev/xvdb       30824956  45124  29207352   1% /mnt

  # Populate disk info into a hash of hashes
  # {"/"=>
  #   {"capacity"=>498876809216, "used"=>434777001984, "available"=>63837663232},
  #  "/Volumes/MobileBackups"=>
  #   {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
  # }

  # get each mount's capacity & utilization
  output.lines.each_with_index do |line, index|
    if (index == 0)
      # skip the header line
      next
    end
    cols = line.split
    # ["Filesystem", "1024-blocks", "Used", "Available", "Capacity", "iused", "ifree", "%iused", "Mounted", "on"]
    # line: ["/dev/disk1", "974368768", "849157528", "124699240", "88%", "106208689", "15587405", "87%", "/"]
    
    if cols.length == 9
      # OSX
      self.diskhash[cols[8]] = {
        'capacity' => (cols[1].to_i ).to_i,
        'used' => (cols[2].to_i ).to_i,
        'available' => (cols[3].to_i ).to_i
      }
    elsif cols.length == 6
      # Ubuntu & CentOS
      self.diskhash[cols[5]] = {
        'capacity' => (cols[1].to_i ).to_i,
        'used' => (cols[2].to_i ).to_i,
        'available' => (cols[3].to_i ).to_i
      }
    else
      raise "Reported disk utilization not understood"
    end
  end

  # puts "Disk mount info:"
  # pp diskhash


  # find the (self.)target_volume
  # look through diskhash keys, to find the one that most matches target_dir
  val_of_min = 1000
  # puts "Determining which volume contains the target directory.."
  self.diskhash.keys.each do |volume|
    result = self.target_dir.gsub(volume, '')
    diskhash['match_amt'] = result.length
    # puts "Considering:\t#{volume}, \t closeness: #{result.length}, \t (#{result})"
    if (result.length < val_of_min)
      # puts "Candidate: #{volume}"
      val_of_min = result.length
      self.target_volume = volume
    end
  end    
  
  puts "Target volume is #{self.target_volume}"
  

  self.capacity   = self.diskhash[self.target_volume]['capacity']
  self.used       = self.diskhash[self.target_volume]['used']
  self.available  = self.diskhash[self.target_volume]['available']

  self.capacity_gb  = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024)}"
  self.used_gb      = "#{'%.0f' % (self.used.to_i / 1024 / 1024)}"
  self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024)}"

  self.dir_tree = DirNode.new(nil, self.target_volume, self.target_volume, self.capacity)
  self.dir_tree.children.push(DirNode.new(self.dir_tree, 'Free Space', 'Free Space', self.available_gb))

end
run() click to toggle source
# File lib/storage_visualizer.rb, line 519
def run
  self.get_basic_disk_info
  self.analyze_dirs(self.target_dir, self.dir_tree)
  self.traverse_tree_and_remove_duplicates
  self.format_data_for_the_chart
  self.write_storage_report
  
end
traverse_tree_and_remove_duplicates() click to toggle source
# File lib/storage_visualizer.rb, line 479
def traverse_tree_and_remove_duplicates
  puts "\nHandling duplicate entries.."
  nodes = []
  nodes.push(self.dir_tree)
  comparison_list = []
  while true
    if (nodes.length == 0)
      break
    end
    
    node = nodes.shift
    comparison_list.push(node)
    # pp node
    if node.parent == nil
      # puts "\tparent: no parent \n\tdir:    #{node.dir_name} \n\tshort:  #{node.dir_short} \n\tsize:   #{node.size_gb}"
    else 
      # puts "\tparent: #{node.parent.dir_short.to_s} \n\tdir:    #{node.dir_name} \n\tshort:  #{node.dir_short} \n\tsize:   #{node.size_gb}"
    end
    nodes.concat(node.children)
  end
  # puts "Done building node list"
  
  
  
  for i in 0..comparison_list.length do
    for j in 0..comparison_list.length do
      if (comparison_list[i] != nil && comparison_list[j] != nil)
        if (i != j && comparison_list[i].dir_short == comparison_list[j].dir_short)
          puts "\t#{comparison_list[i].dir_short} is the same as #{comparison_list[j].dir_short}, changing to #{comparison_list[j].dir_short}*"
          comparison_list[j].dir_short = "#{comparison_list[j].dir_short}*"
        end
      end
    end
  end
  puts "Duplicate handling complete"
  
end
write_storage_report() click to toggle source
# File lib/storage_visualizer.rb, line 205
def write_storage_report

  the_html = %q|<html>
  <body>
  <script type="text/javascript"
        src="https://www.google.com/jsapi?autoload={'modules':[{'name':'visualization','version':'1.1','packages':['sankey']}]}">
  </script>
  
  <style>
    td
      {
        font-family:sans-serif;
        font-size:8pt;
      }

    .bigger
        {
          font-family:sans-serif;
          font-size:10pt;
          font-weight:bold
        }
  
  </style>

  <div class="table">
    <div class="bigger">Storage Report</div>
    <table>
      <tr>
        <td style="text-align:right">Disk Capacity:</td><td>| + self.capacity_gb + %q| GB</td>
      </tr>
      <tr>
        <td style="text-align:right">Disk Used:</td><td>| + self.used_gb + %q| GB</td>
      </tr>
      <tr>
        <td style="text-align:right">Free Space:</td><td>| + self.available_gb + %q| GB</td>
      </tr>
    </table>
        
  </div>


  <div id="sankey_multiple" style="width: 900px; height: 300px;"></div>

  <script type="text/javascript">

  google.setOnLoadCallback(drawChart);
     function drawChart() {
      var data = new google.visualization.DataTable();
      data.addColumn('string', 'From');
      data.addColumn('string', 'To');
      data.addColumn('number', 'Size (GB)');
      data.addRows( | + self.tree_formatted + %q|);

      // Set chart options
      var options = {
      
            width: 1000,
            sankey: {
              iterations: 32,
              node: { label: { 
                        fontName: 'Arial',
                        fontSize: 10,
                        color: '#871b47',
                        bold: false,
                        italic: true } } }, 
          };
          
          
          
      // Instantiate and draw our chart, passing in some options.
      var chart = new google.visualization.Sankey(document.getElementById('sankey_multiple'));
      chart.draw(data, options);
     }
     
     
  </script>
  </body>
  </html>|
  
  
  filename = DateTime.now.strftime("./StorageReport_%Y_%m_%d-%H_%M_%S.html")
  puts "Writing html file #{filename}"
  f = File.open(filename, 'w+')
  f.write(the_html)
  f.close
  
end