class Aozora2Html

青空文庫形式のテキストファイルを html に整形する ruby スクリプト 変換器本体

complex ruby markup if css3 is major supported, please fix ruby position with property “ruby-position” see also: www.w3.org/TR/2001/WD-css3-ruby-20010216/

Constants

ACCENT_BEGIN
ACCENT_END
ACCENT_TABLE
AOZORABUNKO
CAPTION_COMMAND
CHUUKI_COMMAND
CLOSE_MARK
COMMAND_BEGIN
COMMAND_END
COMMAND_TABLE
class, tag
DAKUTEN
DAKUTEN_KATAKANA_TABLE
DOGYO_KOMIDASHI_COMMAND
DOGYO_MARK
DOGYO_NAKAMIDASHI_COMMAND
DOGYO_OMIDASHI_COMMAND
DYNAMIC_CONTENTS
END_MARK
GAIJI_MARK
IGETA_MARK
INDENT_TYPE
JIAGE_COMMAND
JIS2UCS
JISAGE_COMMAND
KAERITEN_COMMAND
KEIGAKOMI_COMMAND
KOMIDASHI_COMMAND
KU

全角バックスラッシュが出せないから直打ち

KUNTEN_OKURIGANA_COMMAND
LEFT_MARK
MADE_MARK
MADO_KOMIDASHI_COMMAND
MADO_MARK
MADO_NAKAMIDASHI_COMMAND
MADO_OMIDASHI_COMMAND
MAIN_MARK
MIDASHI_COMMAND
NAKAMIDASHI_COMMAND
NOJI
NON_0213_GAIJI
OMIDASHI_COMMAND
ONELINE_COMMAND
OPEN_MARK
ORIKAESHI_COMMAND
OVER_MARK
PAREN_BEGIN_MARK
PAREN_END_MARK
PAT_BOUKI
PAT_CHARSIZE
PAT_CHITSUKI
PAT_CHUUKI
PAT_DIRECTION
PAT_EDITOR

PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/

PAT_FRONTREF
PAT_GAIJI
PAT_HENYAKU
PAT_IMAGE
PAT_INLINE_RUBY
PAT_JI_LEN
PAT_KAERITEN
PAT_KUTEN
PAT_KUTEN_DUAL
PAT_OKURIGANA
PAT_ORIKAESHI_JISAGE
PAT_ORIKAESHI_JISAGE2
PAT_REF
PAT_REMOVE_OKURIGANA
PAT_REST_NOTES
PAT_RUBY
PAT_RUBY_DIR
PAT_TRANSLATOR
PHOTO_COMMAND
REGEX_HANKAKU
REGEX_HIRAGANA
REGEX_KANJI
REGEX_KATAKANA
REGEX_ZENKAKU
RUBY_BEGIN_MARK
RUBY_END_MARK
RUBY_PREFIX
SEN_MARK
SIZE_LARGE
SIZE_MIDDLE
SIZE_SMALL
TCY_COMMAND
TEIHON_MARK
TENTSUKI_COMMAND
TEN_MARK
UNDER_MARK
VERSION
WARICHU_COMMAND
WARIGAKI_COMMAND
YOKOGUMI_COMMAND

Public Class Methods

new(input, output, gaiji_dir: nil, css_files: nil) click to toggle source
# File lib/aozora2html.rb, line 148
def initialize(input, output, gaiji_dir: nil, css_files: nil)
  @stream = if input.respond_to?(:read) ## readable IO?
              Jstream.new(input)
            else
              Jstream.new(File.open(input, 'rb:Shift_JIS'))
            end
  @out = if output.respond_to?(:print) ## writable IO?
           output
         else
           File.open(output, 'w')
         end
  @gaiji_dir = gaiji_dir || '../../../gaiji/'
  @css_files = css_files || Array['../../aozora.css']

  @buffer = TextBuffer.new
  @ruby_buf = RubyBuffer.new
  @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
  @header = Aozora2Html::Header.new(css_files: @css_files) ## ヘッダ行の配列
  @style_stack = StyleStack.new ## スタイルのスタック
  @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
  @images = [] ## 使用した外字の画像保持用
  @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
  @tag_stack = []
  @midashi_id = 0  ## 見出しのカウンタ、見出しの種類によって増分が異なる
  @terprip = true  ## 改行制御用 (terpriはLisp由来?)
  @endchar = :eof  ## 解析終了文字、AccentParserやTagParserでは異なる
  @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
end

Public Instance Methods

block_allowed_context?() click to toggle source
# File lib/aozora2html.rb, line 181
def block_allowed_context?
  # inline_tagが開いていないかチェックすれば十分
  @style_stack.empty?
end
detect_command_mode(command) click to toggle source

コマンド文字列からモードのシンボルを取り出す

@return [Symbol]

# File lib/aozora2html.rb, line 244
def detect_command_mode(command)
  if command.match?(INDENT_TYPE[:chitsuki] + END_MARK) || command.match?(JIAGE_COMMAND + END_MARK)
    return :chitsuki
  end

  INDENT_TYPE.each_key do |key|
    if command.match?(INDENT_TYPE[key])
      return key
    end
  end
  nil
end
kuten2png(substring) click to toggle source
# File lib/aozora2html.rb, line 226
def kuten2png(substring)
  desc = substring.gsub(PAT_KUTEN, '')
  matched = desc.match(/[12]-\d{1,2}-\d{1,2}/)
  if matched && !desc.match?(NON_0213_GAIJI) && !desc.match?(PAT_KUTEN_DUAL)
    @chuuki_table[:newjis] = true
    codes = matched[0].split('-')
    folder = sprintf('%1d-%02d', codes[0], codes[1])
    code = sprintf('%1d-%02d-%02d', *codes)
    Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK, ''), gaiji_dir: @gaiji_dir)
  else
    substring
  end
end
line_number() click to toggle source
# File lib/aozora2html.rb, line 177
def line_number
  @stream.line
end
new_midashi_id(size) click to toggle source
# File lib/aozora2html.rb, line 207
def new_midashi_id(size)
  if size.is_a?(Integer)
    @midashi_id += size
    return @midashi_id
  end

  case size
  when /#{SIZE_SMALL}/o
    inc = 1
  when /#{SIZE_MIDDLE}/o
    inc = 10
  when /#{SIZE_LARGE}/o
    inc = 100
  else
    raise Aozora2Html::Error, I18n.t(:undefined_header)
  end
  @midashi_id += inc
end
process() click to toggle source

parseする

終了時(終端まで来た場合)にはthrow :terminateで脱出する

# File lib/aozora2html.rb, line 190
def process
  catch(:terminate) do
    parse
  rescue Aozora2Html::Error => e
    puts e.message(line_number)
    if e.is_a?(Aozora2Html::Error)
      exit(2)
    end
  end
  tail_output # final call
  finalize
  close
rescue StandardError => e
  puts "ERROR: line: #{line_number}"
  raise e
end

Private Instance Methods

apply_burasage(command) click to toggle source
# File lib/aozora2html.rb, line 724
def apply_burasage(command)
  tag = nil
  if implicit_close(:jisage)
    @terprip = false
    general_output
  end
  @noprint = true # always no print
  command = Utils.convert_japanese_number(command)
  if command.match?(TENTSUKI_COMMAND)
    width = command.match(PAT_ORIKAESHI_JISAGE)[1]
    tag = "<div class=\"burasage\" style=\"margin-left: #{width}em; text-indent: -#{width}em;\">"
  else
    matched = command.match(PAT_ORIKAESHI_JISAGE2)
    left, indent = matched.to_a[1, 2]
    left = left.to_i - indent.to_i
    tag = "<div class=\"burasage\" style=\"margin-left: #{indent}em; text-indent: #{left}em;\">"
  end
  @indent_stack.push(tag)
  @tag_stack.push('') # dummy
  nil
end
apply_caption(_command) click to toggle source
# File lib/aozora2html.rb, line 849
def apply_caption(_command)
  @indent_stack.push(:caption)
  Aozora2Html::Tag::MultilineCaption.new(self)
end
apply_chitsuki(string, multiline: false) click to toggle source
# File lib/aozora2html.rb, line 806
def apply_chitsuki(string, multiline: false)
  if string.match?(CLOSE_MARK + INDENT_TYPE[:chitsuki] + END_MARK) ||
     string.match?(CLOSE_MARK + JIAGE_COMMAND + END_MARK)
    explicit_close(:chitsuki)
    @indent_stack.pop
    nil
  else
    len = chitsuki_length(string)
    if multiline
      # 複数行指定
      implicit_close(:chitsuki)
      @indent_stack.push(:chitsuki)
      Aozora2Html::Tag::MultilineChitsuki.new(self, len)
    else
      # 1行のみ
      Aozora2Html::Tag::OnelineChitsuki.new(self, len)
    end
  end
end
apply_dakuten_katakana(command) click to toggle source
# File lib/aozora2html.rb, line 1208
def apply_dakuten_katakana(command)
  n = command.match(/1-7-8([2345])/).to_a[1]
  frontref = DAKUTEN_KATAKANA_TABLE[n]
  found = search_front_reference(frontref)
  if found
    Aozora2Html::Tag::DakutenKatakana.new(self, n, found.join, gaiji_dir: @gaiji_dir)
  else
    apply_rest_notes(command)
  end
end
apply_jisage(command) click to toggle source
# File lib/aozora2html.rb, line 750
def apply_jisage(command)
  if command.match?(MADE_MARK) || command.match?(END_MARK)
    # 字下げ終わり
    explicit_close(:jisage)
    @indent_stack.pop
    nil
  elsif command.match?(ONELINE_COMMAND)
    # 1行だけ
    @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
    nil
  elsif (@buffer.length == 0) && (@stream.peek_char(0) == "\r\n")
    # commandのみ
    @terprip = false
    implicit_close(:jisage)
    # adhook hack
    @noprint = false
    @indent_stack.push(:jisage)
    Aozora2Html::Tag::MultilineJisage.new(self, jisage_width(command))
  else # rubocop:disable Lint/DuplicateBranch
    @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
    nil
  end
end
apply_jizume(command) click to toggle source
# File lib/aozora2html.rb, line 854
def apply_jizume(command)
  w = Utils.convert_japanese_number(command).match(/(\d*)(?:#{INDENT_TYPE[:jizume]})/)[1]
  @indent_stack.push(:jizume)
  Aozora2Html::Tag::Jizume.new(self, w)
end
apply_keigakomi(_command) click to toggle source
# File lib/aozora2html.rb, line 844
def apply_keigakomi(_command)
  @indent_stack.push(:keigakomi)
  Aozora2Html::Tag::Keigakomi.new(self)
end
apply_midashi(command) click to toggle source
# File lib/aozora2html.rb, line 826
def apply_midashi(command)
  @indent_stack.push(:midashi)
  if command.match?(DOGYO_MARK)
    midashi_type = :dogyo
  elsif command.match?(MADO_MARK)
    midashi_type = :mado
  else
    midashi_type = :normal
    @terprip = false
  end
  Aozora2Html::Tag::MultilineMidashi.new(self, command, midashi_type)
end
apply_rest_notes(command) click to toggle source
# File lib/aozora2html.rb, line 1234
def apply_rest_notes(command)
  @chuuki_table[:chuki] = true
  Aozora2Html::Tag::EditorNote.new(self, command)
end
apply_ruby() click to toggle source

|が来たときは文字種を無視してruby_bufを守らなきゃいけない

# File lib/aozora2html.rb, line 1240
def apply_ruby
  @ruby_buf.protected = nil
  ruby, _raw = read_to_nest(RUBY_END_MARK)
  if ruby.length == 0
    # escaped ruby character
    return RUBY_BEGIN_MARK + RUBY_END_MARK
  end

  @buffer.concat(@ruby_buf.create_ruby(self, ruby))

  nil
end
apply_warichu(command) click to toggle source
# File lib/aozora2html.rb, line 774
def apply_warichu(command)
  if command.match?(END_MARK)
    if @stream.peek_char(0) != PAREN_END_MARK
      push_char(PAREN_END_MARK)
    end
    push_char('</span>')
  else
    check = @ruby_buf.last

    # NOTE: Do not remove duplicates!
    # rubocop:disable Style/IdenticalConditionalBranches
    if check.is_a?(String) && check.end_with?(PAREN_BEGIN_MARK)
      push_char('<span class="warichu">')
    else
      push_char('<span class="warichu">')
      push_char(PAREN_BEGIN_MARK)
    end
    # rubocop:enable Style/IdenticalConditionalBranches
  end
  nil
end
apply_yokogumi(_command) click to toggle source
# File lib/aozora2html.rb, line 839
def apply_yokogumi(_command)
  @indent_stack.push(:yokogumi)
  Aozora2Html::Tag::MultilineYokogumi.new(self)
end
assign_kunoji() click to toggle source

くの字点の処理

くの字点は現状そのまま出力するのでフッタの「表記について」で出力するかどうかのフラグ処理だけ行う

# File lib/aozora2html.rb, line 1222
def assign_kunoji
  second = @stream.peek_char(0)
  case second
  when NOJI
    @chuuki_table[:kunoji] = true
  when DAKUTEN
    if @stream.peek_char(1) == NOJI
      @chuuki_table[:dakutenkunoji] = true
    end
  end
end
check_close_match(type) click to toggle source
# File lib/aozora2html.rb, line 298
def check_close_match(type)
  ind = if @indent_stack.last.is_a?(String)
          @noprint = true
          :jisage
        else
          @indent_stack.last
        end
  if ind == type
    nil
  else
    convert_indent_type(type)
  end
end
chitsuki_length(command) click to toggle source
# File lib/aozora2html.rb, line 796
def chitsuki_length(command)
  command = Utils.convert_japanese_number(command)
  matched = command.match(PAT_JI_LEN)
  if matched
    matched[1]
  else
    '0'
  end
end
close() click to toggle source
# File lib/aozora2html.rb, line 287
def close
  @stream.close
  @out.close
end
convert_indent_type(type) click to toggle source

記法のシンボル名から文字列へ変換する シンボルが見つからなければそのまま返す

# File lib/aozora2html.rb, line 294
def convert_indent_type(type)
  INDENT_TYPE[type] || type
end
detect_style_size(style) click to toggle source
# File lib/aozora2html.rb, line 865
def detect_style_size(style)
  if style.match?('小'.to_sjis)
    :sho
  else
    :dai
  end
end
dispatch_aozora_command() click to toggle source

注記記法の場合分け

# File lib/aozora2html.rb, line 677
def dispatch_aozora_command
  # 「[」の次が「#」でなければ注記ではない
  if @stream.peek_char(0) != IGETA_MARK
    return COMMAND_BEGIN
  end

  # 「#」を読み捨てる
  _ = read_char
  command, raw = read_to_nest(COMMAND_END)
  # 適用順序はこれで大丈夫か? 誤爆怖いよ誤爆
  if command.match?(ORIKAESHI_COMMAND)
    apply_burasage(command)

  elsif command.start_with?(OPEN_MARK)
    exec_block_start_command(command)
  elsif command.start_with?(CLOSE_MARK)
    exec_block_end_command(command)

  elsif command.match?(WARICHU_COMMAND)
    apply_warichu(command)
  elsif command.match?(JISAGE_COMMAND)
    apply_jisage(command)
  elsif command.match?(/fig(\d)+_(\d)+\.png/)
    exec_img_command(command, raw)
  # avoid to try complex ruby -- escape to notes
  elsif command.match?(PAT_REST_NOTES)
    apply_rest_notes(command)
  elsif command.end_with?(END_MARK)
    exec_inline_end_command(command)
    nil
  elsif command.match?(PAT_REF)
    exec_frontref_command(command)
  elsif command.match?(/1-7-8[2345]/)
    apply_dakuten_katakana(command)
  elsif command.match?(PAT_KAERITEN)
    Aozora2Html::Tag::Kaeriten.new(self, command)
  elsif command.match?(PAT_OKURIGANA)
    Aozora2Html::Tag::Okurigana.new(self, command.gsub!(PAT_REMOVE_OKURIGANA, ''))
  elsif command.match?(PAT_CHITSUKI)
    apply_chitsuki(command)
  elsif exec_inline_start_command(command)
    nil
  else # rubocop:disable Lint/DuplicateBranch
    apply_rest_notes(command)
  end
end
dispatch_gaiji() click to toggle source
# File lib/aozora2html.rb, line 651
def dispatch_gaiji
  # 「※」の次が「[」でなければ外字ではない
  if @stream.peek_char(0) != COMMAND_BEGIN
    return GAIJI_MARK
  end

  # 「[」を読み捨てる
  _ = read_char
  # embed?
  command, _raw = read_to_nest(COMMAND_END)
  try_emb = kuten2png(command)
  if try_emb != command
    return try_emb
  end

  matched = command.match(/U\+([0-9A-F]{4,5})/)
  if matched && Aozora2Html::Tag::EmbedGaiji.use_unicode
    unicode_num = matched[1]
    Aozora2Html::Tag::EmbedGaiji.new(self, nil, nil, command, unicode_num, gaiji_dir: @gaiji_dir)
  else
    # Unemb
    escape_gaiji(command)
  end
end
dynamic_contents() click to toggle source
# File lib/aozora2html.rb, line 283
def dynamic_contents
  @out.print DYNAMIC_CONTENTS
end
ending_check() click to toggle source

本文が終了したかどうかチェックする

# File lib/aozora2html.rb, line 464
def ending_check
  # `底本:`でフッタ(:tail)に遷移
  return unless @stream.peek_char(0) == TEIHON_MARK[1] && @stream.peek_char(1) == TEIHON_MARK[2]

  @section = :tail
  ensure_close
  @out.print "</div>\r\n<div class=\"bibliographical_information\">\r\n<hr />\r\n<br />\r\n"
end
ensure_close() click to toggle source

本文が終わってよいかチェックし、終わっていなければ例外をあげる

# File lib/aozora2html.rb, line 328
def ensure_close
  n = @indent_stack.last
  return unless n

  raise Aozora2Html::Error, I18n.t(:terminate_in_style, convert_indent_type(n))
end
escape_gaiji(command) click to toggle source
# File lib/aozora2html.rb, line 640
def escape_gaiji(command)
  _whole, kanji, line = command.match(PAT_GAIJI).to_a
  tmp = @images.assoc(kanji)
  if tmp
    tmp.push(line)
  else
    @images.push([kanji, line])
  end
  Aozora2Html::Tag::UnEmbedGaiji.new(self, command)
end
escape_special_chars(char) click to toggle source

Original Aozora2Html#push_chars does not convert “‘” into ’&#39;‘; it’s old behaivor of CGI.escapeHTML().

# File lib/aozora2html.rb, line 1354
def escape_special_chars(char)
  if char.is_a?(String)
    char.gsub(/[&"<>]/, { '&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;' })
  else
    char
  end
end
exec_block_end_command(command) click to toggle source
# File lib/aozora2html.rb, line 1064
def exec_block_end_command(command)
  original_command = command.dup
  command.sub!(/^#{CLOSE_MARK}/o, '')
  matched = false
  mode = detect_command_mode(command)
  if mode
    explicit_close(mode)
    matched = @indent_stack.pop
  end

  if matched
    unless matched.is_a?(String)
      @terprip = false
    end
    nil
  else
    apply_rest_notes(original_command)
  end
end
exec_block_start_command(command) click to toggle source
# File lib/aozora2html.rb, line 986
def exec_block_start_command(command)
  original_command = command.dup
  command.sub!(/^#{OPEN_MARK}/o, '')
  match_buf = +''
  if command.match?(INDENT_TYPE[:jisage])
    push_block_tag(apply_jisage(command), match_buf)
  elsif command.match?(/(#{INDENT_TYPE[:chitsuki]}|#{JIAGE_COMMAND})$/)
    push_block_tag(apply_chitsuki(command, multiline: true), match_buf)
  end

  if command.match?(INDENT_TYPE[:midashi])
    push_block_tag(apply_midashi(command), match_buf)
  end

  if command.match?(INDENT_TYPE[:jizume])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(apply_jizume(command), match_buf)
  end

  if command.match?(INDENT_TYPE[:yokogumi])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(apply_yokogumi(command), match_buf)
  end

  if command.match?(INDENT_TYPE[:keigakomi])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(apply_keigakomi(command), match_buf)
  end

  if command.match?(INDENT_TYPE[:caption])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(apply_caption(command), match_buf)
  end

  if command.match?(INDENT_TYPE[:futoji])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'futoji'), match_buf)
    @indent_stack.push(:futoji)
  end
  if command.match?(INDENT_TYPE[:shatai])
    if match_buf != ''
      @indent_stack.pop
    end
    push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'shatai'), match_buf)
    @indent_stack.push(:shatai)
  end

  if command.match?(PAT_CHARSIZE)
    _whole, nest, style = command.match(PAT_CHARSIZE).to_a
    if match_buf != ''
      @indent_stack.pop
    end
    daisho = detect_style_size(style)
    push_block_tag(Aozora2Html::Tag::FontSize.new(self,
                                                  Utils.convert_japanese_number(nest).to_i,
                                                  daisho),
                   match_buf)
    @indent_stack.push(daisho)
  end

  if match_buf == ''
    apply_rest_notes(original_command)
  else
    @tag_stack.push(match_buf)
    nil
  end
end
exec_frontref_command(command) click to toggle source
# File lib/aozora2html.rb, line 1099
def exec_frontref_command(command)
  _whole, reference, spec1, spec2 = command.match(PAT_FRONTREF).to_a
  spec = if spec1
           spec1 + spec2
         else
           spec2
         end
  if reference
    found = search_front_reference(reference)
    if found
      tmp = exec_style(found, spec)
      return tmp if tmp

      recovery_front_reference(found)
    end
  end
  # comment out?
  apply_rest_notes(command)
end
exec_img_command(command, raw) click to toggle source
# File lib/aozora2html.rb, line 1084
def exec_img_command(command, raw)
  matched = raw.match(PAT_IMAGE)
  if matched
    _whole, alt, src, _wh, width, height = matched.to_a
    css_class = if alt.match?(PHOTO_COMMAND)
                  'photo'
                else
                  'illustration'
                end
    Aozora2Html::Tag::Img.new(self, src, css_class, alt, width, height)
  else
    apply_rest_notes(command)
  end
end
exec_inline_end_command(command) click to toggle source
# File lib/aozora2html.rb, line 966
def exec_inline_end_command(command)
  encount = command.sub(END_MARK, '')
  if encount == MAIN_MARK
    # force to finish main_text
    @section = :tail
    ensure_close
    @noprint = true
    @out.print "</div>\r\n<div class=\"after_text\">\r\n<hr />\r\n"
  elsif encount.match?(CHUUKI_COMMAND) && (@style_stack.last_command == CHUUKI_COMMAND)
    # special inline ruby
    @style_stack.pop
    _whole, ruby = encount.match(PAT_INLINE_RUBY).to_a
    push_char('</rb><rp>(</rp><rt>'.to_sjis + ruby + '</rt><rp>)</rp></ruby>'.to_sjis)
  elsif @style_stack.last_command.match?(encount)
    push_char(@style_stack.pop[1])
  else
    raise Aozora2Html::Error, I18n.t(:invalid_nesting, encount, @style_stack.last_command)
  end
end
exec_inline_start_command(command) click to toggle source
# File lib/aozora2html.rb, line 873
def exec_inline_start_command(command)
  case command
  when CHUUKI_COMMAND
    @style_stack.push([command, '</ruby>'])
    push_char('<ruby><rb>')
  when TCY_COMMAND
    @style_stack.push([command, '</span>'])
    push_char('<span dir="ltr">')
  when KEIGAKOMI_COMMAND
    @style_stack.push([command, '</span>'])
    push_char('<span class="keigakomi">')
  when YOKOGUMI_COMMAND
    @style_stack.push([command, '</span>'])
    push_char('<span class="yokogumi">')
  when CAPTION_COMMAND
    @style_stack.push([command, '</span>'])
    push_char('<span class="caption">')
  when WARIGAKI_COMMAND
    @style_stack.push([command, '</span>'])
    push_char('<span class="warigaki">')
  when OMIDASHI_COMMAND
    @style_stack.push([command, '</a></h3>'])
    @terprip = false
    push_char("<h3 class=\"o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
  when NAKAMIDASHI_COMMAND
    @style_stack.push([command, '</a></h4>'])
    @terprip = false
    push_char("<h4 class=\"naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
  when KOMIDASHI_COMMAND
    @style_stack.push([command, '</a></h5>'])
    @terprip = false
    push_char("<h5 class=\"ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
  when DOGYO_OMIDASHI_COMMAND
    @style_stack.push([command, '</a></h3>'])
    push_char("<h3 class=\"dogyo-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
  when DOGYO_NAKAMIDASHI_COMMAND
    @style_stack.push([command, '</a></h4>'])
    push_char("<h4 class=\"dogyo-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
  when DOGYO_KOMIDASHI_COMMAND
    @style_stack.push([command, '</a></h5>'])
    push_char("<h5 class=\"dogyo-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
  when MADO_OMIDASHI_COMMAND
    @style_stack.push([command, '</a></h3>'])
    push_char("<h3 class=\"mado-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
  when MADO_NAKAMIDASHI_COMMAND
    @style_stack.push([command, '</a></h4>'])
    push_char("<h4 class=\"mado-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
  when MADO_KOMIDASHI_COMMAND
    @style_stack.push([command, '</a></h5>'])
    push_char("<h5 class=\"mado-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
  when PAT_CHARSIZE
    @style_stack.push([command, '</span>'])
    _whole, nest, style = command.match(PAT_CHARSIZE).to_a
    times = Utils.convert_japanese_number(nest).to_i
    daisho = detect_style_size(style)
    html_class = daisho.to_s + times.to_s
    size = Utils.create_font_size(times, daisho)
    push_char("<span class=\"#{html_class}\" style=\"font-size: #{size};\">")
  else
    ## Decoration ##
    key = command
    filter = ->(x) { x }
    if command.match?(PAT_DIRECTION)
      _whole, dir, com = command.match(PAT_DIRECTION).to_a
      # renew command
      key = com
      if command.match?(TEN_MARK)
        case dir
        when LEFT_MARK, UNDER_MARK
          filter = ->(x) { "#{x}_after" }
        end
      elsif command.match?(SEN_MARK)
        case dir
        when LEFT_MARK, OVER_MARK
          filter = ->(x) { x.sub('under', 'over') }
        end
      end
    end

    found = COMMAND_TABLE[key]
    # found = [class, tag]
    if found
      @style_stack.push([command, "</#{found[1]}>"])
      push_char("<#{found[1]} class=\"#{filter.call(found[0])}\">")
    else
      if $DEBUG
        puts I18n.t(:warn_undefined_command, line_number, key)
      end
      nil
    end
  end
end
exec_style(targets, command) click to toggle source
# File lib/aozora2html.rb, line 1134
def exec_style(targets, command)
  try_kuten = kuten2png(command)
  if try_kuten != command
    try_kuten
  elsif command.match?(TCY_COMMAND)
    Aozora2Html::Tag::Dir.new(self, targets)
  elsif command.match?(YOKOGUMI_COMMAND)
    Aozora2Html::Tag::InlineYokogumi.new(self, targets)
  elsif command.match?(KEIGAKOMI_COMMAND)
    Aozora2Html::Tag::InlineKeigakomi.new(self, targets)
  elsif command.match?(CAPTION_COMMAND)
    Aozora2Html::Tag::InlineCaption.new(self, targets)
  elsif command.match?(KAERITEN_COMMAND)
    Aozora2Html::Tag::Kaeriten.new(self, targets)
  elsif command.match?(KUNTEN_OKURIGANA_COMMAND)
    Aozora2Html::Tag::Okurigana.new(self, targets)
  elsif command.match?(MIDASHI_COMMAND)
    midashi_type = :normal
    if command.match?(DOGYO_MARK)
      midashi_type = :dogyo
    elsif command.match?(MADO_MARK)
      midashi_type = :mado
    else
      @terprip = false
    end
    Aozora2Html::Tag::Midashi.new(self, targets, command, midashi_type)
  elsif command.match?(PAT_CHARSIZE)
    _whole, nest, style = command.match(PAT_CHARSIZE).to_a
    Aozora2Html::Tag::InlineFontSize.new(self, targets,
                                         Utils.convert_japanese_number(nest).to_i,
                                         detect_style_size(style))
  elsif command.match?(PAT_RUBY_DIR)
    _whole, _dir, under = command.match(PAT_RUBY_DIR).to_a
    if (targets.length == 1) && targets[0].is_a?(Aozora2Html::Tag::Ruby)
      tag = targets[0]
      raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby) unless tag.under_ruby == ''

      tag.under_ruby = under
      tag
    else
      rearrange_ruby_tag(targets, '', under)
    end
  elsif command.match?(PAT_CHUUKI)
    rearrange_ruby_tag(targets, PAT_CHUUKI.match(command).to_a[1], '')
  elsif command.match?(PAT_BOUKI)
    rearrange_ruby_tag(targets, multiply(PAT_BOUKI.match(command).to_a[1], targets.to_s.length), '')
  else
    ## direction fix! ##
    filter = ->(x) { x }
    if command.match?(PAT_DIRECTION)
      _whole, dir, com = command.match(PAT_DIRECTION).to_a
      # renew command
      command = com
      if command.match?(TEN_MARK)
        case dir
        when LEFT_MARK, UNDER_MARK
          filter = ->(x) { "#{x}_after" }
        end
      elsif command.match?(SEN_MARK)
        case dir
        when LEFT_MARK, OVER_MARK
          filter = ->(x) { x.sub('under', 'over') }
        end
      end
    end

    found = COMMAND_TABLE[command]
    # found = [class, tag]
    if found
      Aozora2Html::Tag::Decorate.new(self, targets, filter.call(found[0]), found[1])
    end
  end
end
explicit_close(type) click to toggle source
# File lib/aozora2html.rb, line 335
def explicit_close(type)
  n = check_close_match(type)
  if n
    raise Aozora2Html::Error, I18n.t(:invalid_closing, n, n)
  end

  tag = @tag_stack.pop
  return unless tag

  push_chars(tag)
end
finalize() click to toggle source
# File lib/aozora2html.rb, line 277
def finalize
  hyoki
  dynamic_contents
  @out.print("</body>\r\n</html>\r\n")
end
general_output() click to toggle source

読み込んだ行の出力を行う

parserが改行文字を読み込んだら呼ばれる。 最終的に@ruby_bufと@bufferは初期化する

@return [void]

# File lib/aozora2html.rb, line 499
def general_output
  if @style_stack.last
    raise Aozora2Html::Error, I18n.t(:dont_crlf_in_style, @style_stack.last_command)
  end

  # bufferにインデントタグだけがあったら改行しない!
  if @noprint
    @noprint = false
    return
  end
  @ruby_buf.dump_into(@buffer)
  buf = @buffer
  @buffer = TextBuffer.new
  tail = []

  indent_type = buf.blank_type
  terprip = buf.terpri? && @terprip
  @terprip = true

  if @indent_stack.last.is_a?(String) && !indent_type
    @out.print @indent_stack.last
  end

  buf.each do |s|
    if s.is_a?(Aozora2Html::Tag::OnelineIndent)
      tail.unshift(s.close_tag)
    elsif s.is_a?(Aozora2Html::Tag::UnEmbedGaiji) && !s.escaped?
      # 消してあった※を復活させて
      @out.print GAIJI_MARK
    end
    @out.print s.to_s
  end

  # 最後はCRLFを出力する
  if @indent_stack.last.is_a?(String)
    # ぶら下げindent
    # tail always active
    @out.print tail.map(&:to_s).join
    if indent_type == :inline
      @out.print "\r\n"
    elsif indent_type && terprip
      @out.print "<br />\r\n"
    else
      @out.print "</div>\r\n"
    end
  elsif tail.empty? && terprip
    @out.print "<br />\r\n"
  else
    @out.print tail.map(&:to_s).join
    @out.print "\r\n"
  end
end
hyoki() click to toggle source

‘●表記について`で使用した注記等を出力する

# File lib/aozora2html.rb, line 1304
  def hyoki
    # <br /> times fix
    @out.print "<br />\r\n</div>\r\n<div class=\"notation_notes\">\r\n<hr />\r\n<br />\r\n●表記について<br />\r\n<ul>\r\n".to_sjis
    @out.print "\t<li>このファイルは W3C 勧告 XHTML1.1 にそった形式で作成されています。</li>\r\n".to_sjis
    if @chuuki_table[:chuki]
      @out.print "\t<li>[#…]は、入力者による注を表す記号です。</li>\r\n".to_sjis
    end
    if @chuuki_table[:kunoji]
      if @chuuki_table[:dakutenkunoji]
        @out.printf("\t<li>「くの字点」は「%s」で、「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI, KU + DAKUTEN + NOJI)
      else
        @out.printf("\t<li>「くの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI)
      end
    elsif @chuuki_table[:dakutenkunoji]
      @out.printf("\t<li>「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + DAKUTEN + NOJI)
    end
    if @chuuki_table[:newjis] && !Aozora2Html::Tag::EmbedGaiji.use_jisx0213
      @out.print "\t<li>「くの字点」をのぞくJIS X 0213にある文字は、画像化して埋め込みました。</li>\r\n".to_sjis
    end
    if @chuuki_table[:accent] && !Aozora2Html::Tag::Accent.use_jisx0213
      @out.print "\t<li>アクセント符号付きラテン文字は、画像化して埋め込みました。</li>\r\n".to_sjis
    end
    if @images[0]
      @out.print "\t<li>この作品には、JIS X 0213にない、以下の文字が用いられています。(数字は、底本中の出現「ページ-行」数。)これらの文字は本文内では「※[#…]」の形で示しました。</li>\r\n</ul>\r\n<br />\r\n\t\t<table class=\"gaiji_list\">\r\n".to_sjis
      @images.each do |cell|
        k, *v = cell
        vs = v.join('、'.to_sjis)
        @out.print "                    <tr>
                                <td>
                                #{k}
                                </td>
                                <td>&nbsp;&nbsp;</td>
                                <td>
#{vs}                           </td>
                                <!--
                                <td>
                                " + '  '.to_sjis + "<img src=\"../../../gaiji/others/xxxx.png\" alt=\"#{k}\" width=32 height=32 />
                                </td>
                                -->
                        </tr>
".to_sjis
      end
      @out.print "\t\t</table>\r\n".to_sjis
    else
      @out.print "</ul>\r\n" # <ul>内に<li>以外のエレメントが来るのは不正なので修正
    end
    @out.print "</div>\r\n"
  end
implicit_close(type) click to toggle source
# File lib/aozora2html.rb, line 312
def implicit_close(type)
  return unless @indent_stack.last

  if check_close_match(type)
    # ok, nested multiline tags, go ahead
  else
    # not nested, please close
    @indent_stack.pop
    tag = @tag_stack.pop
    if tag
      push_chars(tag)
    end
  end
end
jisage_width(command) click to toggle source
# File lib/aozora2html.rb, line 746
def jisage_width(command)
  Utils.convert_japanese_number(command).match(/(\d*)(?:#{JISAGE_COMMAND})/o)[1]
end
judge_chuuki() click to toggle source
# File lib/aozora2html.rb, line 367
def judge_chuuki
  # 注記が入るかどうかチェック
  i = 0
  loop do
    case @stream.peek_char(i)
    when '-'
      i += 1
    when "\r\n"
      @section = if i == 0
                   :body
                 else
                   :chuuki
                 end
      return
    else
      @section = :body
      @out.print("<br />\r\n")
      return
    end
  end
end
multiply(bouki, times) click to toggle source

傍記を並べる用

# File lib/aozora2html.rb, line 1121
def multiply(bouki, times)
  sep = '&nbsp;'
  ([bouki] * times).join(sep)
end
parse() click to toggle source

main loop

# File lib/aozora2html.rb, line 348
def parse
  loop do
    case @section
    when :head
      parse_header
    when :head_end
      judge_chuuki
    when :chuuki, :chuuki_in
      parse_chuuki
    when :body
      parse_body
    when :tail
      parse_tail
    else
      raise Aozora2Html::Error, 'encount undefined condition'
    end
  end
end
parse_body() click to toggle source

本体解析部

1文字ずつ読み込み、dispatchして@buffer,@ruby_bufへしまう 改行コードに当たったら溜め込んだものをgeneral_outputする

# File lib/aozora2html.rb, line 420
def parse_body
  char = read_char
  check = true
  case char
  when ACCENT_BEGIN
    check = false
    char = read_accent
  when TEIHON_MARK[0]
    if @buffer.length == 0
      ending_check
    end
  when GAIJI_MARK
    char = dispatch_gaiji
  when COMMAND_BEGIN
    char = dispatch_aozora_command
  when KU
    assign_kunoji
  when RUBY_BEGIN_MARK
    char = apply_ruby
  end

  case char
  when "\r\n"
    general_output
  when RUBY_PREFIX
    @ruby_buf.dump_into(@buffer)
    @ruby_buf.protected = true
  when @endchar
    # suddenly finished the file
    puts I18n.t(:warn_unexpected_terminator, line_number)
    throw :terminate
  when nil
    # noop
  else
    if check
      Utils.illegal_char_check(char, line_number)
    end
    push_chars(escape_special_chars(char))
  end
end
parse_chuuki() click to toggle source
# File lib/aozora2html.rb, line 403
def parse_chuuki
  string = read_line
  return unless string.match?(/^-+$/)

  case @section
  when :chuuki
    @section = :chuuki_in
  when :chuuki_in
    @section = :body
  end
end
parse_header() click to toggle source

headerは一行ずつ読む

# File lib/aozora2html.rb, line 390
def parse_header
  string = read_line
  # refine from Tomita 09/06/14
  if string == '' # 空行がくれば、そこでヘッダー終了とみなす
    @section = :head_end
    @out.print @header.to_html
  else
    string.gsub!(RUBY_PREFIX, '')
    string.gsub!(PAT_RUBY, '')
    @header.push(string)
  end
end
parse_tail() click to toggle source

parse_bodyのフッタ版

# File lib/aozora2html.rb, line 1254
def parse_tail
  char = read_char
  check = true
  case char
  when ACCENT_BEGIN
    check = false
    char = read_accent
  when @endchar
    throw :terminate
  when GAIJI_MARK
    char = dispatch_gaiji
  when COMMAND_BEGIN
    char = dispatch_aozora_command
  when KU
    assign_kunoji
  when RUBY_BEGIN_MARK
    char = apply_ruby
  end

  case char
  when "\r\n"
    tail_output
  when RUBY_PREFIX
    @ruby_buf.dump_into(@buffer)
    @ruby_buf.protected = true
  when nil
    # noop
  else
    if check
      Utils.illegal_char_check(char, line_number)
    end
    push_chars(escape_special_chars(char))
  end
end
push_block_tag(tag, closing) click to toggle source
# File lib/aozora2html.rb, line 860
def push_block_tag(tag, closing)
  push_char(tag)
  closing.concat(tag.close_tag)
end
push_char(char) click to toggle source
# File lib/aozora2html.rb, line 488
def push_char(char)
  @ruby_buf.push_char(char, @buffer)
end
push_chars(obj) click to toggle source
# File lib/aozora2html.rb, line 473
def push_chars(obj)
  case obj
  when Array
    obj.each do |x|
      push_chars(x)
    end
  when String
    obj.each_char do |x|
      push_char(x)
    end
  else
    push_char(obj)
  end
end
read_accent() click to toggle source
# File lib/aozora2html.rb, line 269
def read_accent
  Aozora2Html::AccentParser.new(@stream, ACCENT_END, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
end
read_char() click to toggle source

一文字読み込む

# File lib/aozora2html.rb, line 260
def read_char
  @stream.read_char
end
read_line() click to toggle source

一行読み込む

# File lib/aozora2html.rb, line 265
def read_line
  @stream.read_line
end
read_to_nest(endchar) click to toggle source
# File lib/aozora2html.rb, line 273
def read_to_nest(endchar)
  Aozora2Html::TagParser.new(@stream, endchar, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
end
rearrange_ruby_tag(targets, upper_ruby, under_ruby) click to toggle source

rubyタグの再生成(本体はrearrange_ruby)

complex ruby wrap up utilities – don’t erase! we will use soon …

# File lib/aozora2html.rb, line 1130
def rearrange_ruby_tag(targets, upper_ruby, under_ruby)
  Aozora2Html::Tag::Ruby.rearrange_ruby(self, targets, upper_ruby, under_ruby)
end
recovery_front_reference(reference) click to toggle source

発見した前方参照を元に戻す

@ruby_bufがあれば@ruby_bufに、なければ@bufferにpushする バッファの最後と各要素が文字列ならconcatし、どちらが文字列でなければ(concatできないので)pushする

@return [void]

# File lib/aozora2html.rb, line 623
def recovery_front_reference(reference)
  reference.each do |elt|
    #      if @ruby_buf.protected
    if @ruby_buf.present?
      @ruby_buf.push(elt)
    elsif @buffer.last.is_a?(String)
      if elt.is_a?(String)
        @buffer.last.concat(elt)
      else
        @buffer.push(elt)
      end
    else # rubocop:disable Lint/DuplicateBranch
      @ruby_buf.push(elt)
    end
  end
end
search_front_reference(string) click to toggle source

前方参照の発見 Ruby,style重ねがけ等々のため、要素の配列で返す

前方参照は‘○○[#「○○」に傍点]`、`吹喋[#「喋」に「ママ」の注記]`といった表記

@return [TextBuffer|false]

# File lib/aozora2html.rb, line 557
def search_front_reference(string)
  if string.length == 0
    return false
  end

  searching_buf = if @ruby_buf.present?
                    @ruby_buf.to_a
                  else
                    @buffer
                  end
  last_string = searching_buf.last
  case last_string
  when String
    if last_string == ''
      searching_buf.pop
      search_front_reference(string)
    elsif last_string.match?(Regexp.new("#{Regexp.quote(string)}$"))
      # 完全一致
      # start = match.begin(0)
      # tail = match.end(0)
      # last_string[start,tail-start] = ""
      searching_buf.pop
      searching_buf.push(last_string.sub(Regexp.new("#{Regexp.quote(string)}$"), ''))
      TextBuffer.new([string])
    elsif string.match?(Regexp.new("#{Regexp.quote(last_string)}$"))
      # 部分一致
      tmp = searching_buf.pop
      found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(last_string)}$"), ''))
      if found
        found.push(tmp)
        found
      else
        searching_buf.push(tmp)
        false
      end
    end
  when Aozora2Html::Tag::ReferenceMentioned
    inner = last_string.target_string
    if inner == string
      # 完全一致
      searching_buf.pop
      TextBuffer.new([last_string])
    elsif string.match?(Regexp.new("#{Regexp.quote(inner)}$"))
      # 部分一致
      tmp = searching_buf.pop
      found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(inner)}$"), ''))
      if found
        found.push(tmp)
        found
      else
        searching_buf.push(tmp)
        false
      end
    end
  else
    false
  end
end
tail_output() click to toggle source

general_outputのフッタ版

# File lib/aozora2html.rb, line 1290
def tail_output
  @ruby_buf.dump_into(@buffer)
  string = @buffer.join
  @buffer = TextBuffer.new
  string.gsub!('info@aozora.gr.jp', '<a href="mailto: info@aozora.gr.jp">info@aozora.gr.jp</a>')
  string.gsub!('青空文庫(http://www.aozora.gr.jp/)'.to_sjis) { "<a href=\"http://www.aozora.gr.jp/\">#{$&}</a>" }
  if string.match?(%r{(<br />$|</p>$|</h\d>$|<div.*>$|</div>$|^<[^>]*>$)})
    @out.print string, "\r\n"
  else
    @out.print string, "<br />\r\n"
  end
end