class Twb::Analysis::CalculatedFieldsAnalyzer

Attributes

calculatedFieldsCount[R]
metrics[R]
referencedFieldsCount[R]
ttdocdir[RW]

Public Class Methods

new(**args) click to toggle source

DOTHEADER

# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 88
def initialize(**args)
    emit "initialize CalculatedFieldsAnalyzer args #{args}"
    @args      = args
    @recordDir = !@args.nil? && @args[:recordDir] == true
    @ttdocdir  = @args[:ttdocdir]
    @csvAdd    = @args[:csvMode] == :add
    @csvMode   = @csvAdd ? 'a' : 'w'
    init
    @funcdoc = {:class=>self.class, :blurb=>'Analyze Calculated Fields', :description=>'Calculated fields can be complex, this tool provides robust coverage.',}
    #-- CSV records collectors
    @csvFormulaFields    = Set.new
    @csvFormulaLines     = Set.new
    #-- Counters setup --
    @twbCount              = 0
    @dataSourcesCount      = 0
    @calculatedFieldsCount = 0
    @referencedFieldsCount = 0
    #--
    @referencedFields     = SortedSet.new
    #--
    twbdirLabel = @recordDir.nil? ? nil : 'Workbook Dir'
    @csvCF   = initCSV(@@calcFieldsCSVFileName, 'Calculated fields and their formulas.',                      @@calcFieldsCSVFileHeader )
    @csvCFLs = initCSV(@@calcLinesCSVFileName,  "Calculated fields and their formulas' individual lines.",    @@calcLinesCSVFileHeader  )
    @csvFF   = initCSV(@@formFieldsCSVFileName, 'Calculated fields and the fields their formulas reference.', @@formFieldsCSVFileHeader )
    # TODO migrate addition of 'Workbook Dir' to CSV header to TabTool
    #--
    @localEmit  = false
    @imageFiles = Array.new
    #--
    @doGraph = config(:dograph)
end

Public Instance Methods

loadMetrics() click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 154
def loadMetrics
  @metrics = {
               '# of Data Sources'      => @dataSourcesCount,
               '# of Calculated Fields' => @calculatedFieldsCount,
               '# of Referenced Fields' => @referencedFieldsCount,
             }
end
processTWB(workbook) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 120
def processTWB workbook
  @twb = workbook.is_a?(String) ? Twb::Workbook.new(workbook) : workbook
  throw Exception unless @twb.is_a? Twb::Workbook
  emit  "- Workbook: #{workbook}"
  emit  "   version: #{@twb.version}"
  @twbDir   = @twb.dir #File.dirname(File.expand_path(workbook))
  @modTime  = @twb.modtime
  @edges    = Set.new
  #-- processing
  dss = @twb.datasources
  # puts "    # data sources: #{dss.length}"
  @twbRootFields = Set.new
  @twbFields = Hash.new { |h,k| h[k] = [] }
  @nodes     = Set.new
  dss.each do |ds|
    @dataSourcesCount += 1
    # puts "\t\t - #{ds.uiname}  \t\t #{ds.calculatedFields.length}"
    next if ds.Parameters?  # don't process the Parameters data source - Parameters' fields aren't Calculated fields for our purposes
    # dataSourceNode = Twb::Util::Graphnode.new(name: ds.uiname, id: ds.id, type: ds, properties: {workbook: workbook})
    # @nodes.add dataSourceNode
    # ds.calculatedFields.each do |calcField|
    # end
    processDataSource ds
  end
  # mapTwb
  emitGml
  @twbCount += 1
  finis
end

Private Instance Methods

cypher(twbName) click to toggle source

def mapTwb

twb        = @twb.name
rootFields = @twbRootFields
dotStuff    = initDot twb
dotFile     = dotStuff[:file]
dotFileName = dotStuff[:name]
dotFile.puts "\n //  subgraph cluster_1 {"
dotFile.puts "   //    color= grey;"
dotFile.puts ""
edgesAsStrings = SortedSet.new
# this two step process coalesces the edges into a unique set, avoiding duplicating the dot
# file entries, and can be shrunk when graph edges expose the bits necessary for management by Set
emit "\n========================\nLoading Edges\n========================\n  From DC?  Referenced?  Edge \n  %s  %s  %s" % ['--------', '-----------', '-'*45]
@edges.each do |e|
  # don't want to emit edge which is from a Data Connection to a
  # Calculated Field which is also referenced by another calculated field
  isFromDC   = e.from.type == :TwbDataConnection
  isRefField = @referencedFields.include?(e.to.id)
  edgesAsStrings.add(e.dot) unless isFromDC && isRefField
  # emit "   ES        #{e.dot}"
  # emit "   ES  from  #{e.from}"
  # emit "   ES    to  #{e.to}"
end
emit "------------------------\n "
edgesAsStrings.each do |es|
  dotFile.puts "        #{es}"
end
emit "========================\n "
dotFile.puts ""
dotFile.puts "   // }"
dotFile.puts "\n\n  //  4 NODES --------------------------------------------------------------------"
@nodes.each do |n|
  dotFile.puts n.dotLabel
end
dotFile.puts "\n\n  //  5--------------------------------------------------------------------"
emitTypes(      dotFile )
closeDot(       dotFile, twb )
emit "Rendering DOT file - #{twb}"
renderDot(twb,dotFileName,'pdf')
renderDot(twb,dotFileName,'png')
renderDot(twb,dotFileName,'svg')
# emitEdges

end

# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 356
def cypher twbName
  if @doGraph
    cypher = Twb::Util::Cypher.new
    cypher.fileName = "#{twbName}.calcFields"
    cypher.nodes = @nodes
    cypher.edges = @edges
    cypher.render
  end
end
cypherPy(twbName) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 366
def cypherPy twbName
  if @doGraph
    cypher = Twb::Util::CypherPython.new
    cypher.fileName = "#{twbName}.calcFields"
    cypher.nodes = @nodes
    cypher.edges = @edges
    cypher.render
  end
end
emitCalcfield(calcField) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 305
def emitCalcfield calcField
  emit "\t FIELD    cap :: #{calcField.caption} "
  emit "\t         tname:: #{calcField.name}"
  emit "\t        uiname:: #{calcField.uiname}"
  emit "\t       formula:: #{calculation.formulaFlat}"
end
emitEdges() click to toggle source

def graphEdges twb

# graphFile = File.new(twb + '.cypher', 'w')
# # graphFile.puts "OKEY DOKE, graphing away"
# cypherCode = Set.new
# @edges.each do |edge|
#   cypherCode.add edge.from.cypherCreate
#   cypherCode.add edge.to.cypherCreate
#   cypherCode.add edge.cypherCreate
# end
# cypherCode.each do |cc|
#   graphFile.puts cc
# end
# graphFile.puts "\nreturn *"
# graphFile.close unless graphFile.nil?
# @imageFiles << File.basename(graphFile)

end

# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 393
def emitEdges
  emit  "  %-15s    %s" % ['type', 'Edge']
  emit  "  %-15s    %s" % ['-'*15, '-'*35]
  @edges.each do |edge|
    emit  "  %-15s    %s" % [edge.from.type, edge.from]
    emit  "  %-15s    %s" % [edge.to.type,   edge.to]
    emit  "\n "
  end
end
emitGml() click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 166
def emitGml
  gml = Twb::Util::GML.new
  docDir = @ttdocdir.nil? ? './' : "#{@ttdocdir}/".sub(/[\/]+$/,'/')
  # emit "emitGml  @ttdocdir:'#{@ttdocdir}'   docDir:'#{docDir}'"
  gml.fileName = "#{docDir}/#{@twb.name}.calculatedFields"  
  gml.nodes = @nodes
  gml.edges = @edges
  gmlFile = gml.render # resets gml.fileName in preparing it for output
  addDocFile gmlFile, gml.fileName, "Graph Modeling Language file of Workbook's Calculated Fields"
end
emitTypes(dotFile) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 403
def emitTypes dotFile
  typedNodes = {}
  dotFile.puts "\n\n  //  2--------------------------------------------------------------------"
  @edges.each do |edge|
    emit  "   EDGE  :: #{edge}"
    loadNodeType typedNodes, edge.from
    loadNodeType typedNodes, edge.to
  end
  typedNodes.each do |type, nodes|
    # emit  "+++++++++ typedNodes of '#{type}''  "
    # nodes.each do |node|
    #   emit  "           -n- #{node}"
    # end
    rankSame(dotFile, type, nodes) unless type.eql? 'CalculatedField' # == :CalculatedField
  end
  # labelTypes dotFile, edges
end
labelTypes(dotFile) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 452
def labelTypes dotFile
  fromTos = Set.new
  @edges.each do |edge|
    # fromTos.add "\"Alien Data Source\"  -> \"Alien Data Source\""
    fromTos.add "\"#{edge.from.type}\""
    fromTos.add "\"#{edge.to.type}\""
  end
  return if fromTos.empty?
  dotFile.puts "\n  //  3--------------------------------------------------------------------"
  dotFile.puts '   subgraph cluster_0 {'
  dotFile.puts '     color=white;'
  dotFile.puts '     node [shape="box3d"  style="filled" ];'
  fromTos.each do |ft|
    dotFile.puts "    #{ft}"
  end
  dotFile.puts '   }'
end
loadNodeType(set, node) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 421
def loadNodeType set, node
  type = node.type
  set[type] = Set.new unless set.include? type
  set[type].add node
end
processDataSource(ds) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 177
def processDataSource ds
  emit   "=======    DATA SOURCE: #{ds.uiname}    ====== "
  dsNodes               = Set.new
  dsEdges               = Set.new
  dsFields              = {}
  @twbFields[ds.uiname] = dsFields
  calculatedFields      = SortedSet.new
  fieldFormulaLines     = Array.new
  referencedFields      = SortedSet.new
  @csvCalculatedFields  = Set.new

  # if @doGraph
    dataSourceNode = Twb::Util::Graphnode.new(name: ds.uiname, id: ds.id, type: ds, properties: {workbook: @twb.name})
    @nodes.add dataSourceNode
  # end
  #-- process Calculatred Fields
  ds.calculatedFields.each do |calcField|
    emit "Calculated Field: #{calcField}"
    calculatedFields.add calcField.id
    dsFields[calcField.uiname] = calcField
    # if @doGraph
    calcFieldNode    = Twb::Util::Graphnode.new(name: calcField.uiname, id: calcField.id, type: calcField, properties: {:DataSource => ds.uiname})
    @nodes.add calcFieldNode
    dsFieldEdge     = Twb::Util::Graphedge.new(from: dataSourceNode, to: calcFieldNode, relationship: 'contains')
    @edges.add dsFieldEdge
    # end
    calculation     = calcField.calculation
    if calculation.has_formula
        #-- collect field formulas as single lines
        csvRec =  [
                    @calculatedFieldsCount += 1,
                    @twb.name,
                    # @modTime,
                    ds.uiname,
                    ds.caption,
                    ds.name,
                    calcField.uiname,
                    calcField.caption,
                    calcField.name,
                    ds.name + '::' + calcField.name,
                    calcField.datatype,
                    calcField.role,
                    calcField.type,
                    calculation.class,     
                    calculation.scopeIsolation,
                    calculation.formulaFlat.length, 
                    calculation.formulaLinesCount, 
                    calculation.formulaFlatResolved,    
                    calculation.formulaFlat.strip,
                    calculation.comments,
                    calculation.is_lod
                  ]
        # puts csvRec.to_s
        # puts "incl? #{@csvCalculatedFields.include?(csvRec)}"
        @csvCalculatedFields << csvRec
        #-- collect individual formula lines
        flnum = 0
        emit "@@ calcField.uiname: #{calcField.uiname}"
        calculation.formulaResolvedLines.each do |fl|
          emit "@@ resolved line::   => '#{fl}'"
          fieldFormulaLines << [ @calculatedFieldsCount,        # 'Calc Field #',
                                 @twb.name,                     # 'Workbook',
                                 # @modTime,
                                 ds.uiname,                     # 'Data Source',
                                 ds.caption,                    # 'Data Source Caption',
                                 ds.name,                       # 'Data Source Name (tech)',
                                 calcField.uiname,              # 'Field Name',
                                 calcField.caption,             # 'Field Caption',
                                 calcField.name,                # 'Field Name (tech)',
                                 calcField.calculation.formulaFlatResolved, # 'Formula'
                                 flnum += 1,                    # 'Formula Line #',
                                 fl.start_with?(" ") ? "'#{fl}" : fl # 'Formula Line' - THIS IS A STUPID HACK NEEDED BECAUSE TABLEAU STRIPS LEADING BLANKS FROM CSV VALUES
                               ]
        end
        #-- collect fields referenced in formula
        emit "# Calculated Fields: #{calculation.referencedFields.length}"
        calculation.referencedFields.each do |rf|
          emit "  referenced field       :: %12s  %s " % [ rf.dataSourceName, rf.uiname ]
          @csvFormulaFields <<  [ 
                                  @referencedFieldsCount += 1,
                                  @twb.name,
                                  # @modTime,
                                  rf.dataSourceName, # ds.uiname,
                                  calcField.uiname,
                                  calculation.formulaFlat,
                                  calculation.formulaFlatResolved,
                                  rf.name,
                                  rf.uiname,   #.uiname,
                                  '',  # rf.id,
                                  '',  #refFieldTable
                                ]
          refFieldNode    = Twb::Util::Graphnode.new(name: rf.uiname, id: rf.id, type: rf, properties: {:DataSource => ds.uiname})
          @nodes.add refFieldNode
          refFieldEdge     = Twb::Util::Graphedge.new(from: calcFieldNode, to: refFieldNode , relationship: 'references')
          @edges.add refFieldEdge
        end # resolvedFields.each do
    end # if calculation.has_formula
  end # ds.calculatedFields.each

  dsRootFields = calculatedFields - referencedFields
  @referencedFields.merge referencedFields
  @twbRootFields.merge dsRootFields
  if @doGraph
    # cypher     @twb.name
    # cypherPy   @twb.name
  end
  emit "#######################"
  #--
  #-- record calculated fields
  twbDirCSV = @recordDir.nil? ? nil : @twbDir  
  emit "@@ record calculated fields  ds: #{ds.uiname}"
  @csvCalculatedFields.each do |r| 
    @csvCF << r
  end
  #-- record individual formula lines
  emit "@@ individual formula lines  ds: #{ds.uiname}"
  fieldFormulaLines.each do |ffl|
    @csvCFLs << ffl
  end
  #-- record formula-referenced fields
  emit "@@ formula-referenced fields ds: #{ds.uiname}"
  @csvFormulaFields.each do |r| 
    @csvFF << r
  end
  #--
  return @imageFiles
end
rankRootFields(dotFile, dsRootFields) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 442
def rankRootFields dotFile, dsRootFields
  dotFile.puts "\n  // Unreferenced (root) Calculated Fields -----------------------------------------"
  dotFile.puts "\n  {rank=same "
  dsRootFields.each do |rf|
    emit "ROOT FIELD: #{rf.class}  :: #{rf}"
    dotFile.puts "     \"#{rf}\""
  end
  dotFile.puts "  }"
end
rankSame(dotFile, type, nodes) click to toggle source
# File lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb, line 428
def rankSame dotFile, type, nodes
  return if @@unrankedTypes.include? type.to_s
  @lines = SortedSet.new
  nodes.each do |node|
    @lines << node.id
  end
  dotFile.puts "\n  // '#{type}' --------------------------------------------------------------------"
  dotFile.puts "\n  {rank=same "
  @lines.each do |line|
    dotFile.puts "     \"#{line}\""
  end
  dotFile.puts "  }"
end