class ZombieWriter::MachineLearning

Attributes

labels[R]
lsi[R]
paragraph_data[R]
plain_to_markdown[R]
renderer[R]

Public Class Methods

new() click to toggle source
# File lib/zombie_writer.rb, line 40
def initialize
  @lsi = ClassifierReborn::LSI.new
  @labels = []
  @paragraph_data = Hash.new
  @plain_to_markdown = Hash.new
  @renderer = Redcarpet::Markdown.new(CustomStripDownRender)
end

Public Instance Methods

add_string(paragraph) click to toggle source
# File lib/zombie_writer.rb, line 48
def add_string(paragraph)
  content = paragraph[:content]

  stripped_down_content = renderer.render(content)

  plain_to_markdown[stripped_down_content] = content

  paragraph_data[content] = ZombieWriter.citation_constructor(paragraph)

  labels << stripped_down_content
  lsi.add_item(stripped_down_content)
end
generate_articles() click to toggle source
# File lib/zombie_writer.rb, line 61
def generate_articles
  number_of_articles = labels.length
  clusters = determine_number_of_clusters(number_of_articles)
  clusters = generate_clusters(clusters: clusters, runs: 10)
  clusters.map do |cluster|
    article_for_summarization = generate_article(cluster) do |point|
      point.label
    end

    final_article = generate_article(cluster) do |point|
      stripped_down_content = point.label
      content = plain_to_markdown[stripped_down_content]
      citation = paragraph_data[content]
      "#{content}#{citation}"
    end

    header = ZombieWriter.header(cluster.id.to_s, article_for_summarization)
    ZombieWriter.formatted_article(header, final_article)
  end
end

Private Instance Methods

determine_number_of_clusters(number_of_articles) click to toggle source
# File lib/zombie_writer.rb, line 92
def determine_number_of_clusters(number_of_articles)
  [1, ((number_of_articles/5).to_f).floor].max
end
generate_article(cluster) { |point| ... } click to toggle source
# File lib/zombie_writer.rb, line 96
def generate_article(cluster, &block)
  cluster.points.map do |point|
    yield(point)
  end.join("\n\n")
end
generate_clusters(clusters:, runs:) click to toggle source
# File lib/zombie_writer.rb, line 83
def generate_clusters(clusters:, runs:)
  string_data = lsi.instance_variable_get(:"@items")
  data = labels.map do |string|
    string_data[string].lsi_norm.to_a
  end
  kmeans = KMeansClusterer.run clusters, data, labels: labels, runs: runs
  kmeans.clusters
end