class MediaArtsDb::Comic::Parse

Public Class Methods

parse_author(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 463
def parse_author(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  # NOTE: 「ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  result[:headings] = clip_text(tbody, 2, 2 + offset) # 標目
  result[:name] = clip_text(tbody, 2, 3 + offset) # 名称
  result[:name_kana] = clip_text(tbody, 2, 4 + offset)  # ヨミ
  result[:name_alphabet] = clip_text(tbody, 2, 5 + offset)  # ローマ字
  result[:reference_authors] = clip_authors(tbody, 2, 6 + offset)  # をも見よ参照
  result[:other_name] = clip_text(tbody, 2, 7 + offset) # 別名(表記ミス・ユレ、本名、新字旧字など)
  result[:birthday] = clip_text(tbody, 2, 8 + offset) # 生年月日(結成年月日)
  result[:death_date] = clip_text(tbody, 2, 9 + offset) # 没年月日

  result[:comic_work_quantity] = doc.css('body > article > div.sub > section:nth-child(1) > h3 > span').text
  result[:comic_works] = [] # 単行本化された作品 ※マンガ作品
  doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
    table.css('tr').each do |tr|
      next if tr.css('td').empty?
      content = {}
      # content[:author] = clip_text(tr, 2)
      result[:comic_works] << ComicWork.new(clip_id(tr, 1), content)
    end
  end

  result[:comic_title_quantity] = doc.css('body > article > div.sub > section:nth-child(2) > h3 > span').text
  result[:comic_titles] = []  # 単行本全巻
  doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
    table.css('tr').each do |tr|
      next if tr.css('td').empty?
      content = {}
      content[:title] = clip_text(tr, 1)
      # content[:author] = clip_text(tr, 2)
      content[:comic_quantity] = clip_text(tr, 3)
      result[:comic_titles] << ComicTitle.new(clip_id(tr, 1), content)
    end
  end

  # 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装

  result
end
parse_booklet(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 559
def parse_booklet(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)

  tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
  result[:comic_work] = ComicWork.new(clip_id(tbody, 4, 1), {}) # (マンガ)作品ID
  result[:category] = clip_text(tbody, 2, 2)  # 分類
  result[:title] = clip_text(tbody, 2, 3)  # 冊子名
  result[:title_kana] = clip_text(tbody, 2, 4)  # ヨミ
  result[:title_append] = clip_text(tbody, 2, 5)  # 冊子名追記
  result[:title_append_kana] = clip_text(tbody, 2, 6)  # ヨミ
  result[:volume] = clip_text(tbody, 2, 7) # 巻
  result[:volume_sort_number] = clip_text(tbody, 4, 7)  # 巻ソート
  result[:title_other] = clip_text(tbody, 2, 8) # 冊子名別版表示
  result[:introduction] = clip_text(tbody, 2, 9)  # 紹介文

  tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
  result[:authority] = clip_text(tbody, 2, 1)  # 責任表示
  result[:authors] = clip_authors(tbody, 2, 2)  # 著者典拠ID
  result[:author] = clip_text(tbody, 2, 3)  # 作者・著者
  result[:author_kana] = clip_text(tbody, 4, 3)
  result[:original] = clip_text(tbody, 2, 4)  # 原作・原案
  result[:original_kana] = clip_text(tbody, 4, 4)
  result[:collaborator] = clip_text(tbody, 2, 5)  # 協力者
  result[:collaborator_kana] = clip_text(tbody, 4, 5)
  result[:headings] = clip_text(tbody, 2, 6) # 標目

  tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
  result[:publisher] = clip_text(tbody, 2, 1) # 出版者名(サークル名)
  result[:series] = clip_text(tbody, 2, 3)  # シリーズ
  result[:series_kana] = clip_text(tbody, 2, 4) # ヨミ
  result[:series_number] = clip_text(tbody, 2, 5) # シリーズ番号
  result[:published_event] = clip_text(tbody, 2, 6) # 頒布イベント

  tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
  result[:published_data] = clip_text(tbody, 2, 1)  # 初版発行年月日
  result[:price] = clip_text(tbody, 4, 1)
  result[:published_data_note] = clip_text(tbody, 2, 2) # 発行日備考
  result[:japan_book_number] = clip_text(tbody, 2, 3) # 全国書誌番号
  result[:format] = clip_text(tbody, 2, 4)  # 製本・造本形態
  result[:total_page] = clip_text(tbody, 2, 5)  # ページ数
  result[:size] = clip_text(tbody, 4, 5)
  result[:published_area] = clip_text(tbody, 2, 6)  # 発行地
  result[:publisher2] = clip_text(tbody, 4, 6)
  result[:langage] = clip_text(tbody, 2, 7) # 言語区分
  result[:rating] = clip_text(tbody, 2, 8)  # レイティング
  result[:tags] = clip_text(tbody, 2, 9)  # タグ
  result[:note] = clip_text(tbody, 2, 10)  # 備考

  result
end
parse_comic(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 261
def parse_comic(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?

  # 前巻/次巻
  prev_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
  result[:prev_comic] = prev_id ? Comic.new(prev_id) : nil
  next_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
  result[:next_comic] = next_id ? Comic.new(next_id) : nil

  # 基本情報
  tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
  result[:comic_title] = ComicTitle.new(clip_id(tbody, 4, 1))
  result[:title] = clip_text(tbody, 2, 2)
  result[:title_kana] = clip_text(tbody, 2, 3)
  result[:title_append] = clip_text(tbody, 2, 4)
  result[:title_append_kana] = clip_text(tbody, 2, 5)
  result[:volume] = clip_text(tbody, 2, 6)
  result[:volume_sort_number] = clip_text(tbody, 4, 6)
  result[:volume_other_number] = clip_text(tbody, 2, 7)
  result[:introduction] = clip_text(tbody, 2, 8)

  # 著者表示
  tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
  result[:responsible] = clip_text(tbody, 2, 1)
  result[:authors] = clip_authors(tbody, 2, 2)
  result[:headings] = clip_text(tbody, 2, 3)
  result[:auhtor] = clip_text(tbody, 2, 4)
  result[:auhtor_kana] = clip_text(tbody, 4, 4)
  result[:original_title] = clip_text(tbody, 2, 5)
  result[:original_title_kana] = clip_text(tbody, 4, 5)
  result[:collaborator] = clip_text(tbody, 2, 5)
  result[:collaborator_kana] = clip_text(tbody, 4, 6)

  # 出版者・レーベル
  tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
  result[:publisher] = clip_text(tbody, 2, 1)
  result[:label] = clip_text(tbody, 2, 3)
  result[:label_kana] = clip_text(tbody, 2, 4)
  result[:label_number] = clip_text(tbody, 2, 5)
  result[:series] = clip_text(tbody, 2, 6)
  result[:series_kana] = clip_text(tbody, 4, 6)

  # その他
  tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
  result[:published_date] = clip_text(tbody, 2, 1)
  result[:first_price] = clip_text(tbody, 4, 1)
  result[:isbn10] = clip_isbn10(tbody, 2, 2)
  result[:isbn13] = clip_isbn13(tbody, 2, 2)
  result[:japan_book_number] = clip_text(tbody, 2, 3)
  result[:total_page] = clip_text(tbody, 2, 4)
  result[:size] = clip_text(tbody, 4, 4)
  result[:langage] = clip_text(tbody, 2, 5)
  result[:published_area] = clip_text(tbody, 4, 5)
  result[:rating] = clip_text(tbody, 2, 6)
  result[:category] = clip_text(tbody, 2, 7)
  result[:tags] = clip_text(tbody, 2, 8)
  result[:note] = clip_text(tbody, 2, 9)

  result
end
parse_comic_title(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 210
def parse_comic_title(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  # NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  result[:comic_work] = ComicWork.new(clip_id(tbody, 2, 2 + offset))
  result[:title] = clip_text(tbody, 2, 3 + offset)  # 単行本全巻名
  result[:title_kana] = clip_text(tbody, 2, 4 + offset) # 単行本全巻名 ヨミ
  result[:title_append] = clip_text(tbody, 2, 5 + offset) # 単行本全巻名 追記
  result[:title_append_kana] = clip_text(tbody, 2, 6 + offset)  # 単行本全巻名 追記 ヨミ
  result[:title_other] = clip_text(tbody, 2, 7 + offset)  # 単行本全巻名 別版表示
  # result[:total_comic_volume] = clip_text(tbody, 2, 8 + offset) # 単行本全巻数 comic_quantityと同値なのでコメントアウト
  result[:responsible] = clip_text(tbody, 2, 9 + offset)  # 責任表示
  result[:author] = clip_text(tbody, 2, 10 + offset) # 作者・著者
  result[:author_kana] = clip_text(tbody, 2, 11 + offset)  # 作者・著者 ヨミ
  result[:origina] = clip_text(tbody, 2, 12 + offset)  # 原作・原案
  result[:origina_kana] = clip_text(tbody, 2, 13 + offset) # 原作・原案 ヨミ
  result[:collaborator] = clip_text(tbody, 2, 14 + offset) # 協力者
  result[:collaborator_kana] = clip_text(tbody, 2, 15 + offset)  # 協力者 ヨミ
  result[:headings] = clip_text(tbody, 2, 16 + offset) # 標目
  result[:authors] = clip_authors(tbody, 2, 17 + offset)  # 著者典拠ID
  result[:label] = clip_text(tbody, 2, 18 + offset)  # 単行本レーベル
  result[:label_kana] = clip_text(tbody, 2, 19 + offset) # 単行本レーベル ヨミ
  result[:series] = clip_text(tbody, 2, 21 + offset) # シリーズ
  result[:series_kana] = clip_text(tbody, 2, 22 + offset)  # シリーズ ヨミ
  result[:publisher] = clip_text(tbody, 2, 23 + offset)  # 出版者名
  result[:published_area] = clip_text(tbody, 2, 25 + offset) # 出版地
  result[:size] = clip_text(tbody, 2, 26 + offset) # 縦の長さ×横の長さ
  result[:isbn] = clip_text(tbody, 2, 27 + offset) # ISBNなどのセットコード
  result[:langage] = clip_text(tbody, 2, 28 + offset)  # 言語区分
  result[:category] = clip_text(tbody, 2, 29 + offset) # 分類
  result[:rating] = clip_text(tbody, 2, 30 + offset) # レイティング
  result[:introduction] = clip_text(tbody, 2, 31 + offset) # 単行本全巻紹介文
  result[:tags] = clip_text(tbody, 2, 32 + offset) # 単行本全巻タグ
  result[:note] = clip_text(tbody, 2, 33 + offset) # 単行本全巻備考

  result[:comic_quantity] = doc.css('body > article > div.sub > section > nav > h3 > span').text
  result[:comics] = [] # 単行本
  doc.css('body > article > div.sub > section:nth-child(1) > table > tbody > tr').each do |tr|
    next if tr.css('td').empty?
    content = {}
    content[:title] = clip_text(tr, 1)
    content[:title_append] = clip_text(tr, 2)
    content[:volume] = clip_text(tr, 3)
    result[:comics] << Comic.new(clip_id(tr, 1), content)
  end

  result
end
parse_comic_work(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 159
def parse_comic_work(response_body)
  content = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  # NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  content[:title] = clip_text(tbody, 2, 2 + offset) # マンガ作品名
  content[:title_kana] = clip_text(tbody, 2, 3 + offset) # マンガ作品名ヨミ
  content[:sub_title] = clip_text(tbody, 2, 4 + offset) # 別題・副題・原題
  content[:title_alphabet] = clip_text(tbody, 2, 5 + offset) # ローマ字表記
  # content[:author] = clip_text(tbody, 2, 6 + offset) # 著者(責任表示) # author.nameを参照するためコメントアウト
  content[:authors] = clip_authors(tbody, 2, 7 + offset) # 著者典拠ID
  content[:published_date] = clip_text(tbody, 2, 8 + offset) # 公表時期
  content[:source] = clip_text(tbody, 2, 9 + offset) # 出典(初出)
  content[:introduction] = clip_text(tbody, 2, 10 + offset) # マンガ作品紹介文・解説
  content[:category] = clip_text(tbody, 2, 11 + offset) # 分類
  content[:tags] = clip_text(tbody, 2, 12 + offset) # タグ
  content[:rating] = clip_text(tbody, 2, 13 + offset) # レイティング

  content[:comic_title_quantity] = doc.css('body > article > div.sub > section:nth-child(1) > h3 > span').text
  content[:comic_titles] = [] # 単行本全巻
  doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
    table.css('tr').each do |tr|
      next if tr.css('td').empty?
      comic_title_content = {}
      comic_title_content[:title] = clip_text(tr, 1)
      comic_title_content[:author] = clip_text(tr, 2)
      comic_title_content[:total_comic_volume] = clip_text(tr, 3)
      content[:comic_titles] << ComicTitle.new(clip_id(tr, 1), comic_title_content)
    end
  end

  content[:magazine_work_quantity] = doc.css('body > article > div.sub > section:nth-child(2) > h3 > span').text
  content[:magazine_works] = []  # 雑誌掲載作品
  doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
    table.css('tr').each do |tr|
      next if tr.css('td').empty?
      magazine_work_content = {}
      magazine_work_content[:title] = clip_text(tr, 1)
      magazine_work_content[:author] = clip_text(tr, 2)
      magazine_work_content[:magazine_title] = clip_text(tr, 3)
      magazine_work_content[:published_date] = clip_text(tr, 4)
      content[:magazine_works] << MagazineWork.new(clip_id(tr, 1), magazine_work_content)
    end
  end

  # 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装

  content
end
parse_magazine(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 397
def parse_magazine(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?

  # Next/Prev
  prev_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
  result[:prev_magazine] = prev_id ? Magazine.new(prev_id) : nil
  next_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
  result[:next_magazine] = next_id ? Magazine.new(next_id) : nil

  # 基本情報
  tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
  # NOTE: 「雑誌略号ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
  offset = 2
  result[:sub_title] = clip_text(tbody, 2, 3 + offset) # サブタイトル
  result[:sub_title_kana] = clip_text(tbody, 2, 4 + offset)  # サブタイトルヨミ
  result[:display_date] = clip_text(tbody, 2, 5 + offset)  # 表示年月日
  result[:display_date_merger] = clip_text(tbody, 2, 6 + offset) # 表示月日(合併 + offset)
  result[:published_date] = clip_text(tbody, 2, 7 + offset)  # 発行年月日
  result[:published_date_merger] = clip_text(tbody, 2, 8 + offset) # 発行月日(合併 + offset)
  result[:release_date] = clip_text(tbody, 2, 9 + offset)  # 発売年月日
  result[:display_volume] = clip_text(tbody, 2, 10 + offset)  # 表示号数
  result[:display_merger_volume] = clip_text(tbody, 2, 11 + offset)  # 表示合併号数
  result[:display_sub_volume] = clip_text(tbody, 2, 12 + offset)  # 補助号数
  result[:volume] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(2)"))  # 巻
  result[:volume2] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(4)"))  # 号
  result[:volume3] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(6)"))  # 通巻

  # 出版者、ページ数、価格
  tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
  # NOTE: 「出版者名」部分のHTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  # result[:publisher] = clip_text(tbody, 2, 1)  # 出版者名  # 取得できない
  result[:publisher2] = clip_text(tbody, 2, 3 + offset) # 発行人
  result[:publisher3] = clip_text(tbody, 2, 4 + offset) # 編集人
  result[:total_page] = clip_text(tbody, 2, 5 + offset) # ページ数
  result[:binding] = clip_text(tbody, 2, 6 + offset)  # 製本
  result[:category] = clip_text(tbody, 2, 7 + offset) # 分類
  result[:rating] = clip_text(tbody, 2, 8 + offset) # レイティング
  result[:size] = clip_text(tbody, 2, 9 + offset) # 縦の長さx横の長さ
  result[:price] = clip_text(tbody, 2, 10 + offset)  # 価格
  result[:magazine_code] = clip_text(tbody, 2, 11 + offset)  # 雑誌コード
  result[:tags] = clip_text(tbody, 2, 12 + offset) # タグ
  result[:note] = clip_text(tbody, 2, 13 + offset) # 備考

  # 目次
  result[:contents] = []
  doc.css('body > article > div.sub > section:nth-child(2) > table > tbody > tr').each do |tr|
    next if tr.css('td').empty?
    content = {}
    content[:category] = clip_text(tr, 1)
    # content[:title] = clip_text(tr.css('td:nth-child(2)'))  # magazine_work.titleを参照する
    content[:magazine_work] = MagazineWork.new(clip_id(tr, 2))
    # content[:author] = clip_text(tr, 3)  # magazine_work.authorを参照する
    content[:sub_title] = clip_text(tr, 4)
    content[:start_page] = clip_text(tr, 5)
    content[:total_page] = clip_text(tr, 6)
    content[:note] = clip_text(tr, 7)
    content[:format] = clip_text(tr, 8)
    result[:contents] << content
  end

  result
end
parse_magazine_title(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 355
def parse_magazine_title(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  # NOTE: 「雑誌略号ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  result[:title] = clip_text(tbody, 2, 2 + offset)  # 雑誌名
  result[:title_kana] = clip_text(tbody, 2, 3 + offset) # 雑誌名 ヨミ
  result[:publisher] = clip_text(tbody, 2, 4 + offset)  # 出版者名
  result[:published_area] = clip_text(tbody, 2, 6 + offset) # 出版地
  result[:published_interval] = clip_text(tbody, 2, 7 + offset) # 発行頻度
  result[:history] = clip_text(tbody, 2, 8 + offset)  # 変遷
  result[:introduction] = clip_text(tbody, 2, 9 + offset) # 紹介文
  result[:published_start_date] = clip_text(tbody, 2, 10 + offset) # 創刊年月日
  result[:published_end_date] = clip_text(tbody, 2, 11 + offset) # 終刊年月日
  result[:display_last_volume] = clip_text(tbody, 2, 12 + offset)  # 終刊表示号数
  result[:last_volume] = clip_text(tbody, 2, 13 + offset) # 終刊号
  result[:volume] = clip_text(tbody, 4, 13 + offset)  # 号
  result[:volume2] = clip_text(tbody, 6, 13 + offset) # 巻号
  # NOTE: 「ISSN」部分のHTML構造の誤りにより要素番号をずらす必要がある。
  # result[:issn] = clip_text(tbody, 2, 14 + offset) # ISSN
  offset = 2
  result[:japan_book_number] = clip_text(tbody, 2, 15 + offset)  # 全国書誌番号
  result[:osaka_title_code] = clip_text(tbody, 2, 16 + offset) # 大阪タイトルコード
  result[:langage] = clip_text(tbody, 2, 17 + offset)  # 言語区分
  result[:tags] = clip_text(tbody, 2, 18 + offset) # タグ
  result[:note] = clip_text(tbody, 2, 19 + offset) # 備考

  result[:magazines] = [] # 雑誌巻号
  doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
    next if tr.css('td').empty?
    content = {}
    content[:title] = clip_text(tr, 1)
    content[:published_date] = clip_text(tr, 2)
    content[:display_volume] = clip_text(tr, 3)
    content[:display_sub_volume] = clip_text(tr, 4)
    result[:magazines] << Magazine.new(clip_id(tr, 1), content)
  end

  result
end
parse_magazine_work(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 324
def parse_magazine_work(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  # NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
  offset = 1
  result[:title] = clip_text(tbody, 2, 2 + offset)  # 雑誌作品名
  result[:title_kana] = clip_text(tbody, 2, 3 + offset) # 雑誌作品名 ヨミ
  result[:author] = clip_text(tbody, 2, 4 + offset) # 作者・著者
  result[:author_kana] = clip_text(tbody, 2, 5 + offset)  # 作者・著者 ヨミ
  result[:original] = clip_text(tbody, 2, 6 + offset) # 原作・原案
  result[:original_kana] = clip_text(tbody, 2, 7 + offset)  # 原作・原案 ヨミ
  result[:collaborator] = clip_text(tbody, 2, 8 + offset) # 協力者
  result[:collaborator_kana] = clip_text(tbody, 2, 9 + offset)  # 協力者 ヨミ
  result[:tags] = clip_text(tbody, 2, 10 + offset) # タグ
  result[:note] = clip_text(tbody, 2, 11 + offset) # 備考

  result[:magazines] = [] # 雑誌巻号
  doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
    next if tr.css('td').empty?
    content = {}
    content[:title] = clip_text(tr, 1)
    content[:published_date] = clip_text(tr, 2)
    content[:display_volume] = clip_text(tr, 3)
    content[:display_sub_volume] = clip_text(tr, 4)
    result[:magazines] << Magazine.new(clip_id(tr, 1), content)
  end

  result
end
parse_material(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 507
def parse_material(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  result[:title] = clip_text(tbody, 2, 2) # 資料名
  result[:title_kana] = clip_text(tbody, 2, 3) # 資料名ヨミ
  result[:discription] = clip_text(tbody, 2, 4)  # 詳細説明(説明文)
  result[:number] = clip_text(tbody, 2, 5)  # 順序
  result[:number_sort] = clip_text(tbody, 4, 5)  # 順序ソート
  result[:related_material_title] = clip_text(tbody, 2, 6) # 関連物(連携収蔵物)
  result[:cateogry] = clip_text(tbody, 2, 7) # 分類・カテゴリー
  result[:authority] = clip_text(tbody, 2, 8) # 責任表示
  result[:authority_kana] = clip_text(tbody, 2, 9) # 責任表示ヨミ
  result[:published_date] = clip_text(tbody, 2, 11) # 時期
  result[:created_date] = clip_text(tbody, 2, 12) # 作成(発行)地
  result[:quantity] = clip_text(tbody, 2, 13) # 数量
  result[:size] = clip_text(tbody, 2, 14) # 大きさ
  result[:condition] = clip_text(tbody, 2, 15) # 状態
  result[:langage] = clip_text(tbody, 2, 16) # 言語区分
  result[:code] = clip_text(tbody, 2, 17) # コード
  result[:tags] = clip_text(tbody, 2, 18) # タグ

  result
end
parse_original_picture(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 532
def parse_original_picture(response_body)
  result = {}
  doc = Nokogiri::HTML.parse(response_body)
  tbody = doc.css('body > article > div.main > section > table > tbody')
  result[:title] = clip_text(tbody, 2, 2) # 原画名
  result[:title_kana] = clip_text(tbody, 2, 3) # 原画ヨミ
  result[:volume] = clip_text(tbody, 2, 4)  # 順序
  result[:volume_sort_number] = clip_text(tbody, 4, 4)  # 順序ソート
  result[:total_page] = clip_text(tbody, 2, 5)  # 枚数
  result[:discription] = clip_text(tbody, 2, 6)  # 詳細説明(紹介文)
  result[:first_magazine_published] = clip_text(tbody, 2, 7) # 雑誌初出
  result[:recording] = clip_text(tbody, 2, 8) # 収録
  result[:authority] = clip_text(tbody, 2, 9) # 作画者・共著者
  result[:authority_kana] = clip_text(tbody, 2, 10) # 作画者・共著者ヨミ
  result[:authors] = clip_authors(tbody, 2, 11) # 著者典拠ID
  result[:created_date] = clip_text(tbody, 2, 12) # 執筆時期
  result[:size] = clip_text(tbody, 2, 13) # 大きさ
  result[:color] = clip_text(tbody, 2, 14) # 色数
  result[:painting_material] = clip_text(tbody, 2, 15) # 状態
  result[:condition] = clip_text(tbody, 2, 16) # 状態
  result[:langage] = clip_text(tbody, 2, 17) # 言語区分
  result[:tags] = clip_text(tbody, 2, 18) # タグ
  result[:comic_work] = ComicWork.new(clip_id(tbody, 2, 18)) # 作品ID

  result
end
parse_search_author(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 50
def parse_search_author(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabC table > tbody > tr').each do |tr|
    # NOTE:
    # 検索結果に著者名の行と雑誌掲載作品名の行があるが、
    # 検索したいのは著者名なので雑誌掲載作品名の行は無視する
    next unless has_id?(tr, 1)

    content = {}
    content[:name] = clip_text(tr, 1)
    content[:name_kana] = clip_text(tr, 2)
    content[:related_authors] = clip_authors(tr, 3)
    content[:comic_work_quantity] = clip_text(tr, 4)
    # content[:magazine_works_name] = clip_text(tr, 5)

    contents << Author.new(clip_id(tr, 1), content)
  end
  contents
end
parse_search_magazine(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 33
def parse_search_magazine(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabB table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 1)
    content[:publisher] = clip_text(tr, 2)
    content[:published_interval] = clip_text(tr, 3)
    content[:published_start_date] = clip_text(tr, 4)
    content[:published_end_date] = clip_text(tr, 5)
    content[:tags] = clip_text(tr, 6)

    contents << MagazineTitle.new(clip_id(tr, 1), content)
  end
  contents
end
parse_search_target_booklet(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 142
def parse_search_target_booklet(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabD_subE > div > table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 2) # 冊子名
    content[:series] = clip_text(tr, 3) # シリーズ
    content[:volume] = clip_text(tr, 4) # 巻
    content[:author] = clip_text(tr, 5) # 著者名
    content[:publisher] = clip_text(tr, 6) # 出版者・サークル名
    content[:published_date] = clip_text(tr, 7) # 発行年月

    contents << Booklet.new(clip_id(tr, 2), content)
  end
  contents
end
parse_search_target_comic(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 71
def parse_search_target_comic(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabD_subA > div > table > tbody > tr').each do |tr|
    content = {}
    content[:isbn10] = clip_isbn10(tr, 1) # ISBN10
    content[:isbn13] = clip_isbn13(tr, 1) # ISBN13
    content[:title] = clip_text(tr, 2) # 単行本名
    content[:label] = clip_text(tr, 3) # 単行本レーベル
    content[:volume] = clip_text(tr, 4) # 巻
    content[:author] = clip_text(tr, 5) # 著者名
    content[:publisher] = clip_text(tr, 6) # 出版者
    content[:published_date] = clip_text(tr, 7) # 発行年月

    contents << Comic.new(clip_id(tr, 2), content)
  end
  contents
end
parse_search_target_magazine(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 90
def parse_search_target_magazine(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabD_subB > div > table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 2) # 雑誌名
    content[:volume] = clip_text(tr, 3) # 巻・合・通巻
    content[:display_volume] = clip_text(tr, 4) # 表示号数
    content[:display_sub_volume] = clip_text(tr, 5) # 補助号数
    content[:publisher] = clip_text(tr, 6) # 出版者
    content[:published_date] = clip_text(tr, 7) # 表示年月

    contents << Magazine.new(clip_id(tr, 2), content)
  end
  contents
end
parse_search_target_material(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 107
def parse_search_target_material(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabD_subC > div > table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 2) # 資料名
    content[:category] = clip_text(tr, 3) # 分類・カテゴリー
    content[:number] = clip_text(tr, 4) # 順序
    content[:author] = clip_text(tr, 5) # 著者名
    content[:related_material_title] = clip_text(tr, 6) # 関連物
    content[:published_date] = clip_text(tr, 7) # 時期

    contents << Material.new(clip_id(tr, 2), content)
  end
  contents
end
parse_search_target_original_picture(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 124
def parse_search_target_original_picture(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabD_subD > div > table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 2) # 原画作品名
    content[:recorded] = clip_text(tr, 3) # 収録
    content[:number] = clip_text(tr, 4) # 順序
    content[:quantity] = clip_text(tr, 5) # 枚数
    content[:author] = clip_text(tr, 6) # 著者名
    content[:published_date] = clip_text(tr, 7) # 初出
    content[:writing_time] = clip_text(tr, 8) # 執筆期間

    contents << OriginalPicture.new(clip_id(tr, 2), content)
  end
  contents
end
parse_search_title(response_body) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 7
def parse_search_title(response_body)
  contents = []
  doc = Nokogiri::HTML.parse(response_body)
  doc.css('div.resultTabA table > tbody > tr').each do |tr|
    content = {}
    content[:title] = clip_text(tr, 1)  # 作品名
    content[:author] = clip_text(tr, 2) # 著者名
    content[:tags] = clip_text(tr, 3) # タグ
    content[:comic_title_quantity] = clip_text(tr, 4) # 単行本全巻
    content[:magazine_work_quantity] = clip_text(tr, 5)  # 雑誌掲載作品
    content[:material_quantity] = clip_text(tr, 6)  # 資料 クラス化非対応
    content[:original_picture_quantity] = clip_text(tr, 7) # 原画 クラス化非対応
    content[:booklet_quantity] = clip_text(tr, 8)  # その他 クラス化非対応

    # リンクがcomic_worksとmagazine_worksの場合がある
    # TODO: link_urlが正しいか確認する
    case clip_uri(tr, 1)
      when /comic_works/
        contents << ComicWork.new(clip_id(tr, 1), content)
      when /magazine_works/
        contents << MagazineWork.new(clip_id(tr, 1), content)
    end
  end
  contents
end

Private Class Methods

clip_authors(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 656
def clip_authors(node, td_number = nil, tr_number = nil)
  authors = []
  if td_number && tr_number
    node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number}) > a")
  elsif td_number
    node = node.css("td:nth-child(#{td_number}) > a")
  end
  node.each do |a|
    # content = { :name => clip_text(a) } # textがnameではない場合もあるのでコメントアウト
    content = {}
    authors << Author.new(clip_id(a), content)
  end
  authors
rescue
  []
end
clip_id(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 639
def clip_id(node, td_number = nil, tr_number = nil)
  uri = clip_uri(node, td_number, tr_number)
  # urlにqueryパラメータがある場合、?以降をを取り除く
  uri = uri[0...uri.index(/\?/)] if uri.include?('?')
  uri.slice(/[0-9]+$/)
rescue
  nil
end
clip_isbn10(node, td_number = nil, tr_number = nil) click to toggle source

NOTE: ISBN10は旧フォーマット。10桁の数字(最後は「X」の場合もある) ISBN13は現行フォーマット。13桁の数字

# File lib/media_arts_db/comic/parse.rb, line 677
def clip_isbn10(node, td_number = nil, tr_number = nil)
  text = clip_text(node, td_number, tr_number)
  text.match(/\b[0-9X]{10}\b/).to_s
rescue
  nil
end
clip_isbn13(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 684
def clip_isbn13(node, td_number = nil, tr_number = nil)
  text = clip_text(node, td_number, tr_number)
  text.match(/\b[0-9]{13}\b/).to_s
rescue
  nil
end
clip_text(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 613
def clip_text(node, td_number = nil, tr_number = nil)
  if td_number && tr_number
    node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number})")
  elsif td_number
    node = node.css("td:nth-child(#{td_number})")
  end
  text = node.css('a').empty? ? node.text : node.css('a').text
  # 改行を取り除き、半角スペースを削除、全角スペースを削除
  text.gsub!(/\n/, '')
  text.strip!
  text.gsub!(/^[ \s]*(.*?)[ \s]*$/, '\1')
rescue
  nil
end
clip_uri(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 628
def clip_uri(node, td_number = nil, tr_number = nil)
  if td_number && tr_number
    node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number}) > a")
  elsif td_number
    node = node.css("td:nth-child(#{td_number}) > a")
  end
  node.attribute('href').value
rescue
  nil
end
has_id?(node, td_number = nil, tr_number = nil) click to toggle source
# File lib/media_arts_db/comic/parse.rb, line 648
def has_id?(node, td_number = nil, tr_number = nil)
  if clip_id(node, td_number, tr_number)
    true
  else
    false
  end
end