class NHKore::Sifter

@author Jonathan Bradley Whited @since 0.2.0

Constants

DEFAULT_DIR
DEFAULT_FUTSUU_FILE
DEFAULT_FUTSUU_FILENAME
DEFAULT_YASASHII_FILE
DEFAULT_YASASHII_FILENAME

Attributes

articles[RW]
caption[RW]
filters[RW]
ignores[RW]
output[RW]

Public Class Methods

build_file(filename) click to toggle source
# File lib/nhkore/sifter.rb, line 30
def self.build_file(filename)
  return File.join(DEFAULT_DIR,filename)
end
new(news) click to toggle source
# File lib/nhkore/sifter.rb, line 43
def initialize(news)
  @articles = news.articles.values.dup
  @caption = nil
  @filters = {}
  @ignores = {}
  @output = nil
end

Public Instance Methods

build_header() click to toggle source
# File lib/nhkore/sifter.rb, line 51
def build_header
  header = []

  header << 'Frequency' unless @ignores[:freq]
  header << 'Word' unless @ignores[:word]
  header << 'Kana' unless @ignores[:kana]
  header << 'English' unless @ignores[:eng]
  header << 'Definition' unless @ignores[:defn]

  return header
end
build_rows(words) click to toggle source
# File lib/nhkore/sifter.rb, line 63
def build_rows(words)
  rows = []

  words.each do |word|
    rows << build_word_row(word)
  end

  return rows
end
build_word_row(word) click to toggle source
# File lib/nhkore/sifter.rb, line 73
def build_word_row(word)
  row = []

  row << word.freq unless @ignores[:freq]
  row << word.word unless @ignores[:word]
  row << word.kana unless @ignores[:kana]
  row << word.eng unless @ignores[:eng]
  row << word.defn unless @ignores[:defn]

  return row
end
compare_empty_str(str1,str2) click to toggle source
# File lib/nhkore/sifter.rb, line 363
def compare_empty_str(str1,str2)
  has_str1 = !Util.empty_web_str?(str1)
  has_str2 = !Util.empty_web_str?(str2)

  if has_str1 && !has_str2
    return -1 # Bubble word1 to top
  elsif !has_str1 && has_str2
    return 1 # Bubble word2 to top
  end

  return 0 # Further comparison needed
end
filter?(article) click to toggle source
# File lib/nhkore/sifter.rb, line 85
def filter?(article)
  return false if @filters.empty?

  datetime_filter = @filters[:datetime]
  title_filter = @filters[:title]
  url_filter = @filters[:url]

  if !datetime_filter.nil?
    datetime = article.datetime

    return true if datetime.nil? ||
      datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
  end

  if !title_filter.nil?
    title = article.title.to_s
    title = Util.unspace_web_str(title) if title_filter[:unspace]
    title = title.downcase if title_filter[:uncase]

    return true unless title.include?(title_filter[:filter])
  end

  if !url_filter.nil?
    url = article.url.to_s
    url = Util.unspace_web_str(url) if url_filter[:unspace]
    url = url.downcase if url_filter[:uncase]

    return true unless url.include?(url_filter[:filter])
  end

  return false
end
filter_by_datetime(datetime_filter=nil,from: nil,to: nil) click to toggle source
# File lib/nhkore/sifter.rb, line 118
def filter_by_datetime(datetime_filter=nil,from: nil,to: nil)
  if !datetime_filter.nil?
    if datetime_filter.respond_to?(:[])
      # If out-of-bounds, just nil.
      from = datetime_filter[0] if from.nil?
      to = datetime_filter[1] if to.nil?
    else
      from = datetime_filter if from.nil?
      to = datetime_filter if to.nil?
    end
  end

  from = to if from.nil?
  to = from if to.nil?

  from = Util.jst_time(from) unless from.nil?
  to = Util.jst_time(to) unless to.nil?

  datetime_filter = [from,to]

  return self if datetime_filter.flatten.compact.empty?

  @filters[:datetime] = {from: from,to: to}

  return self
end
filter_by_title(title_filter,uncase: true,unspace: true) click to toggle source
# File lib/nhkore/sifter.rb, line 145
def filter_by_title(title_filter,uncase: true,unspace: true)
  title_filter = Util.unspace_web_str(title_filter) if unspace
  title_filter = title_filter.downcase if uncase

  @filters[:title] = {filter: title_filter,uncase: uncase,unspace: unspace}

  return self
end
filter_by_url(url_filter,uncase: true,unspace: true) click to toggle source
# File lib/nhkore/sifter.rb, line 154
def filter_by_url(url_filter,uncase: true,unspace: true)
  url_filter = Util.unspace_web_str(url_filter) if unspace
  url_filter = url_filter.downcase if uncase

  @filters[:url] = {filter: url_filter,uncase: uncase,unspace: unspace}

  return self
end
ignore(key) click to toggle source
# File lib/nhkore/sifter.rb, line 163
def ignore(key)
  @ignores[key] = true

  return self
end
put_csv!() click to toggle source

This does not output {caption}.

# File lib/nhkore/sifter.rb, line 170
def put_csv!
  require 'csv'

  words = sift

  @output = CSV.generate(headers: :first_row,write_headers: true) do |csv|
    csv << build_header

    words.each do |word|
      csv << build_word_row(word)
    end
  end

  return @output
end
put_html!() click to toggle source
# File lib/nhkore/sifter.rb, line 186
    def put_html!
      words = sift

      @output = ''.dup

      @output << <<~HTML
        <!DOCTYPE html>
        <html lang="ja">
        <head>
        <meta charset="utf-8">
        <title>NHKore</title>
        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Noto+Serif+JP&amp;display=fallback">
        <style>
        body {
          background-color: #FCFBF9;
          color: #333333;
          font-family: 'Noto Serif JP',Verdana,sans-serif;
        }
        h1 {
          color: #737373;
        }
        table {
          border-collapse: collapse;
          table-layout: fixed;
          width: 100%;
        }
        tr:nth-child(even) {
          background-color: #A5C7ED;
        }
        tr:hover {
          background-color: #FFDDCA;
        }
        td,th {
          border: 1px solid #333333;
          padding: 8px;
          text-align: left;
        }
        th {
          background-color: #082A8E;
          color: #FCFBF9;
        }
        td {
          vertical-align: top;
        }
        td:nth-child(1) {
          padding-right: 1em;
          text-align: right;
        }
        </style>
        </head>
        <body>
        <h1>NHKore</h1>
        <h2>#{@caption}</h2>
        <table>
      HTML

      # If have too few or too many '<col>', invalid HTML.
      @output << %Q(<col style="width:6em;">\n) unless @ignores[:freq]
      @output << %Q(<col style="width:17em;">\n) unless @ignores[:word]
      @output << %Q(<col style="width:17em;">\n) unless @ignores[:kana]
      @output << %Q(<col style="width:5em;">\n) unless @ignores[:eng]
      @output << "<col>\n" unless @ignores[:defn] # No width for defn, fills rest of page

      @output << '<tr>'

      build_header.each do |h|
        @output << "<th>#{h}</th>"
      end

      @output << "</tr>\n"

      words.each do |word|
        @output << '<tr>'

        build_word_row(word).each do |w|
          @output << "<td>#{Util.escape_html(w.to_s)}</td>"
        end

        @output << "</tr>\n"
      end

      @output << <<~HTML
        </table>
        </body>
        </html>
      HTML

      return @output
    end
put_json!() click to toggle source
# File lib/nhkore/sifter.rb, line 276
    def put_json!
      require 'json'

      words = sift

      @output = ''.dup

      @output << <<~JSON
        {
        "caption": #{JSON.generate(@caption)},
        "header": #{JSON.generate(build_header)},
        "words": [
      JSON

      if !words.empty?
        0.upto(words.length - 2) do |i|
          @output << "  #{JSON.generate(build_word_row(words[i]))},\n"
        end

        @output << "  #{JSON.generate(build_word_row(words[-1]))}\n"
      end

      @output << "]\n}\n"

      return @output
    end
put_yaml!() click to toggle source
# File lib/nhkore/sifter.rb, line 303
def put_yaml!
  require 'psychgus'

  words = sift

  yaml = {
    caption: @caption,
    header: build_header,
    words: build_rows(words),
  }

  header_styler = Class.new do
    include Psychgus::Styler

    def style_sequence(sniffer,node)
      parent = sniffer.parent

      if !parent.nil? && parent.node.respond_to?(:value) && parent.value == 'header'
        node.style = Psychgus::SEQUENCE_FLOW
      end
    end
  end

  # Put each Word on one line (flow/inline style).
  @output = Util.dump_yaml(yaml,flow_level: 4,stylers: header_styler.new)

  return @output
end
sift() click to toggle source
# File lib/nhkore/sifter.rb, line 332
def sift
  master_article = Article.new

  @articles.each do |article|
    next if filter?(article)

    article.words.each_value do |word|
      master_article.add_word(word,use_freq: true)
    end
  end

  words = master_article.words.values

  words.sort! do |word1,word2|
    # Order by freq DESC (most frequent words to top).
    i = (word2.freq <=> word1.freq)

    # Order by !defn.empty, word ASC, !kana.empty, kana ASC, defn.len DESC, defn ASC.
    i = compare_empty_str(word1.defn,word2.defn) if i == 0 # Favor words that have definitions
    i = (word1.word.to_s <=> word2.word.to_s) if i == 0
    i = compare_empty_str(word1.kana,word2.kana) if i == 0 # Favor words that have kana
    i = (word1.kana.to_s <=> word2.kana.to_s) if i == 0
    i = (word2.defn.to_s.length <=> word1.defn.to_s.length) if i == 0 # Favor longer definitions
    i = (word1.defn.to_s <=> word2.defn.to_s) if i == 0

    i
  end

  return words
end
style_sequence(sniffer,node) click to toggle source
# File lib/nhkore/sifter.rb, line 317
def style_sequence(sniffer,node)
  parent = sniffer.parent

  if !parent.nil? && parent.node.respond_to?(:value) && parent.value == 'header'
    node.style = Psychgus::SEQUENCE_FLOW
  end
end
to_s() click to toggle source
# File lib/nhkore/sifter.rb, line 376
def to_s
  return @output.to_s
end