module ChinesePhrases

Constants

VERSION

Public Class Methods

get_examples(query, params) click to toggle source
# File lib/chinese_phrases.rb, line 65
def get_examples query, params
  query_escaped = CGI::escape query
  callback = "jQuery1111013304390430117385_1567195383336"

  puts "Fetching examples for #{query}"

  url = "https://dict.naver.com/linedict/cnen/example/search.dict?callback=#{callback}&query=#{query_escaped}&page=#{params[:page]}&page_size=#{params[:page_size]}&examType=normal&fieldType=&author=&country=&ql=default&format=json&platform=isPC&_=1567195383337"

  response = HTTParty.get(url)

  if !response.success?
    puts "Error from server #{response.code}"
    puts "Server response:"
    puts response.parse_response
    return []
  end

  callback_removed = response.match("#{callback}(.*)")

  if callback_removed == nil
    puts "Callback not found, json error from server. Outputting response."
    puts response
    return []
  end

  cleaned_resp = callback_removed[1][1..-2]

  data = JSON.parse cleaned_resp

  exampleList = data["exampleList"]

  puts "Found #{exampleList.count} examples"

  # collect shortest examples
  examples = exampleList.filter { |i| i["example"].length < params[:max_length] }

  if params[:max_per] > -1
    examples = examples[0..params[:max_per] - 1]
  end

  puts "Filtered down to #{examples.count} examples"

  examples
end
run(input_file, options = {}) click to toggle source
# File lib/chinese_phrases.rb, line 11
def run input_file, options = {}
  input_index = 0
  output_file = options[:output_file] || "output_phrases.csv"
  to_trad = options[:trad]

  params = {
    page: options[:page] || 1, # page to check on source
    page_size: options[:page_size] || 50, # number of examples from api call
    max_length: options[:max_len] || 15, # only accept examples length than this
    max_per: options[:max_per] || 10, # only accept this number of examples
  }

  puts "Running with params #{params}"

  query_list = []
  total_examples = []

  # read input file to create list of words to query
  CSV.foreach(input_file) do |csv|
    query_list << Tradsim::to_sim(csv[input_index])
  end

  if query_list.empty?
    puts "CSV file is empty!"
  elsif query_list.count > 1000
    puts "[WARNING] CSV file has over 1000 lines, this can take a long time and/or fail! Consider splitting the file."
  end

  # query each word individually and combine to total list
  query_list.each do |q|
    exs = get_examples q, params
    total_examples.push *exs
  end

  # output each example to file
  CSV.open(output_file, "w") do |csv|
    total_examples.each do |a|
      example = a["example"]

      if to_trad
        example = Tradsim::to_trad(example)
      end

      pinyin = a["pinyin"]

      puts "Writing #{a["recentTrslation"]} #{example} #{pinyin}"

      csv << [a["recentTrslation"], example, pinyin]
    end
  end

  puts "Wrote #{output_file}"
end