class ZombieWriter::MachineLearning
Attributes
labels[R]
lsi[R]
paragraph_data[R]
plain_to_markdown[R]
renderer[R]
Public Class Methods
new()
click to toggle source
# File lib/zombie_writer.rb, line 40 def initialize @lsi = ClassifierReborn::LSI.new @labels = [] @paragraph_data = Hash.new @plain_to_markdown = Hash.new @renderer = Redcarpet::Markdown.new(CustomStripDownRender) end
Public Instance Methods
add_string(paragraph)
click to toggle source
# File lib/zombie_writer.rb, line 48 def add_string(paragraph) content = paragraph[:content] stripped_down_content = renderer.render(content) plain_to_markdown[stripped_down_content] = content paragraph_data[content] = ZombieWriter.citation_constructor(paragraph) labels << stripped_down_content lsi.add_item(stripped_down_content) end
generate_articles()
click to toggle source
# File lib/zombie_writer.rb, line 61 def generate_articles number_of_articles = labels.length clusters = determine_number_of_clusters(number_of_articles) clusters = generate_clusters(clusters: clusters, runs: 10) clusters.map do |cluster| article_for_summarization = generate_article(cluster) do |point| point.label end final_article = generate_article(cluster) do |point| stripped_down_content = point.label content = plain_to_markdown[stripped_down_content] citation = paragraph_data[content] "#{content}#{citation}" end header = ZombieWriter.header(cluster.id.to_s, article_for_summarization) ZombieWriter.formatted_article(header, final_article) end end
Private Instance Methods
determine_number_of_clusters(number_of_articles)
click to toggle source
# File lib/zombie_writer.rb, line 92 def determine_number_of_clusters(number_of_articles) [1, ((number_of_articles/5).to_f).floor].max end
generate_article(cluster) { |point| ... }
click to toggle source
# File lib/zombie_writer.rb, line 96 def generate_article(cluster, &block) cluster.points.map do |point| yield(point) end.join("\n\n") end
generate_clusters(clusters:, runs:)
click to toggle source
# File lib/zombie_writer.rb, line 83 def generate_clusters(clusters:, runs:) string_data = lsi.instance_variable_get(:"@items") data = labels.map do |string| string_data[string].lsi_norm.to_a end kmeans = KMeansClusterer.run clusters, data, labels: labels, runs: runs kmeans.clusters end