class Aozora4Reader
Constants
- KANJIPAT
- MAX_SAGE
- PreambleLineNumber
Public Class Methods
a4r(file)
click to toggle source
# File lib/aozora4reader.rb, line 18 def self.a4r(file) self.new(file).main end
new(file)
click to toggle source
# File lib/aozora4reader.rb, line 22 def initialize(file) @inputfile_name = file @jisage = false @log_text = [] @line_num=0 @gaiji = {} @gaiji2 = {} @sayuuchuuou = false end
Public Instance Methods
load_gaiji()
click to toggle source
外字用ハッシュを作成
# File lib/aozora4reader.rb, line 333 def load_gaiji datadir = File.dirname(__FILE__)+"/../data" File.open(datadir+"/gaiji.txt", "r:UTF-8") do |f| while gaiji_line = f.gets gaiji_line.chomp! key, data = gaiji_line.split @gaiji[key] = data end end File.open(datadir+"/gaiji2.txt", "r:UTF-8") do |f| while gaiji_line = f.gets gaiji_line.chomp! key, data = gaiji_line.split data.gsub(/#.*$/,'') @gaiji2[key] = data end end end
main()
click to toggle source
メインパート
# File lib/aozora4reader.rb, line 357 def main load_gaiji() # 入出力ファイルの定義 outputfile_name = @inputfile_name.sub(/\.txt$/, ".tex") inputfile = File.open(@inputfile_name, "r:SJIS") outputfile = File.open(outputfile_name, "w:UTF-8") # プリアンブルの処理 empty_line = 0 in_note = false meta_data = [] while empty_line < 2 line = inputfile.gets.chomp line = NKF::nkf('-wS', line) if in_note if line =~ /^-+$/ in_note = false break end else if line =~ /^-+$/ in_note = true else if line =~ /^$/ empty_line += 1 else if line =~ /《.*?》/ translate_ruby(line) end meta_data << line end end end end @line_num += meta_data.size @title = normalize(meta_data.shift) case meta_data.size when 1 @author = normalize(meta_data.shift) when 2 @subtitle = normalize(meta_data.shift) @author = normalize(meta_data.shift) when 3 @subtitle = normalize(meta_data.shift) @author = normalize(meta_data.shift) @subauthor = normalize(meta_data.shift) else @subtitle = normalize(meta_data.shift) @meta_data = [] until meta_data.empty? @meta_data << normalize(meta_data.shift) end @subauthor = @meta_data.pop @author = @meta_data.pop end outputfile.write(preamble()) outputfile.print "\\title{"+@title+"}\n" outputfile.print "\\subtitle{"+@subtitle+"}\n" if @subtitle outputfile.print "\\author{"+@author+"}\n" outputfile.print "\\subauthor{"+@subauthor+"}\n" if @subauthor if @meta_data @meta_data.each do |data| outputfile.print "\\metadata{"+data+"}\n" end end outputfile.print "\\date{}\n" # 本文の処理 outputfile.print "\\begin{document}\n\\maketitle\n" @line_num += PreambleLineNumber while line = inputfile.gets @line_num += 1 line.chomp! line = NKF::nkf('-wS', line) break if line =~ /^底本/ if line =~ /^ 「/ line.sub!(/^ 「/, "\\mbox{ }\\kern0mm\\inhibitglue「") end if line =~ /[ワヰヱヲ]゛/ line.gsub!(/ワ゛/, "\\ajLig{ワ゛}") line.gsub!(/ヰ゛/, "\\ajLig{ヰ゛}") line.gsub!(/ヱ゛/, "\\ajLig{ヱ゛}") line.gsub!(/ヲ゛/, "\\ajLig{ヲ゛}") end if line =~ /[?!] / line.gsub!(/([?!]) /, '\1{}') end if line =~ /——/ line.gsub!(/——/, "\\——{}") end if line =~ //\/ line.gsub!(//\/, "\\ajKunoji{}") end if line =~ //″\/ line.gsub!(//″\/, "\\ajDKunoji{}") end =begin if line =~ /^ +.+/ line.gsub!(/^ +([一二三四五六七八九〇十].*)/, '\\section*{\1}') end =end while line =~ /(.+?)[#(「\1」は横?[1|一]文字[^]]*?)]/ line = line.sub(/(.+?)[#(「\1」は横?[1|一]文字[^]]*?)]/){"\\ajLig{"+to_single_byte($1)+"}"} end if line =~ /[#改丁.*?]/ if @sayuuchuuou line = "\\vspace*{\\stretch{1}}" + line @sayuuchuuou = false end line.sub!(/[#改丁.*?]/, "\\cleardoublepage") end if line =~ /[#改[頁|ページ].*?]/ if @sayuuchuuou line = "\\vspace*{\\stretch{1}}" + line @sayuuchuuou = false end line.sub!(/[#改[頁|ページ].*?]/, "\\clearpage") end if line =~ /〔.*?〕/ translate_accent(line) end if line =~ /※/ translate_gaiji(line) end if line =~ /《.*?》/ translate_ruby(line) end if line =~ /[#(.+?)傍点]/ translate_bouten(line) end if line =~ /[#傍点].+?[#傍点終わり]/ translate_bouten(line) end if line =~ /[#「(.+?)」に(?:二重)?[傍鎖破波]線]/ translate_bousen(line) end if line =~ /[#この行.*?([1234567890一二三四五六七八九〇十]*)字下げ]/ outputfile.print "\\begin{jisage}{"+to_single_byte($1)+"}\n" line = line.sub(/[#この行.*?字下げ]/, "")+"\n\\end{jisage}" @line_num += 2 end if line =~ /[#ここから地から.+字上げ]/ line.sub!(/[#ここから地から([1234567890一二三四五六七八九〇十]*)字上げ]/){"\\begin{flushright}\\advance\\rightskip"+to_single_byte($1)+"zw"} @jisage = true end if line =~ /[#ここで字上げ終わり]/ line.sub!(/[#ここで字上げ終わり]/){"\\end{flushright}"} @jisage = false end if line =~ /[#ここから改行天付き、折り返して.*?字下げ]/ if @jisage outputfile.print "\\end{jisage}\n" @line_num += 1 end line.sub!(/[#ここから改行天付き、折り返して([1234567890一二三四五六七八九〇十]*)字下げ]/){"\\begin{jisage}{#{to_single_byte($1)}}\\setlength\\parindent{-"+to_single_byte($1)+"zw}"} @jisage = true end if line =~ /[#.*?字下げ[^]]*?(?:終わり|まで)[^]]*?]/ line = line.sub(/[#.*?字下げ.*?(?:終わり|まで).*?]/, "")+"\\end{jisage}" @jisage = false end if line =~ /[#(ここから|これより|ここより|以下).+字下げ.*?]/ if @jisage outputfile.print "\\end{jisage}\n" @line_num += 1 end line.sub!(/[#(ここから|これより|ここより|以下).*?([1234567890一二三四五六七八九〇十]*)字下げ.*?]/){"\\begin{jisage}{"+to_single_byte($2)+"}"} @jisage = true end if line =~ /^[#ここから地付き]$/ @jisage = true line = "\\begin{flushright}" end if line =~ /^[#ここで地付き終わり]$/ line = "\\end{flushright}" @jisage = false end if line =~ /[#.*?地付き.*?]$/ line = "\\begin{flushright}\n"+line.sub(/[#.*?地付き.*?]$/, "\\end{flushright}") @line_num += 1 elsif line =~ /[#.*?地付き.*?]/ line = line.sub(/[#.*?地付き.*?]/, "\\begin{flushright}\n")+"\\end{flushright}" @line_num += 1 end if line =~ /[#.*?(?:行末|地)(?:から|より).*?([1234567890一二三四五六七八九〇十]*)字上.*?]$$/ line = "\\begin{flushright}\\advance\\rightskip"+to_single_byte($1)+"zw\n"+line.sub(/[#.*?(?:行末|地)(?:から|より).*?字上.*?]$/, "\\end{flushright}") @line_num += 1 elsif line =~ /^(.*?)[#.*?(?:行末|地)(?:から|より).*?([1234567890一二三四五六七八九〇十]*)字上.*?](.*)$/ line = $1+"\\begin{flushright}\\advance\\rightskip"+to_single_byte($2)+"zw\n"+$3+"\\end{flushright}" @line_num += 1 end if line =~ /[#「.+?」は返り点]/ line.gsub!(/(.+)[#「\1」は返り点]/, '\\kaeriten{\ajKunten{\1}}') end if line =~ /[#[一二三上中下甲乙丙丁レ]*]/ line.gsub!(/[#([一二三上中下甲乙丙丁レ]*)]/, '\\kaeriten{\ajKunten{\1}}') end if line =~ /[#(.*?)]/ line.gsub!(/[#((.*?))]/, '\\okurigana{\1}') end if line =~ /[#「.+?」.*?ママ.*?注記]/ line.gsub!(/(.+)[#「\1」.*?ママ.*?注記]/, '\\ruby{\1}{ママ}') end if line =~ /[#[^]]+(([^)]+.png).*?)[^]]+]/ line.gsub!(/[#[^]]+(([^)]+.png).*?)[^]]+]/, '\\sashie{\1}') end if line =~ /[#([1234567890一二三四五六七八九〇十]*)字下げ]/ num = to_single_byte($1).to_i if num > MAX_SAGE num = MAX_SAGE end outputfile.print "\\begin{jisage}{#{num}}\n" line = line.sub(/[#.*?字下げ]/, "")+"\n\\end{jisage}" end ## ちょっと汚いけど二重指定の対策 if line =~ /[#「(.*?)」は縦中横][#「(.*?)」は中見出し]/ line.gsub!(/(.*?)[#「(\1)」は縦中横][#「(\1)」は中見出し]/){"{\\large \\rensuji{#{$1}}}"} end if line =~ /[#「(.*?)」は大見出し]/ line.gsub!(/(.*?)[#「(.*?)」は大見出し]/){"{\\Large #{$1}}"} end if line =~ /[#「(.*?)」は中見出し]/ line.gsub!(/(.*?)[#「(.*?)」は中見出し]/){"{\\large #{$1}}"} end if line =~ /[#「(.*?)」は小見出し]/ line.gsub!(/(.*?)[#「(.*?)」は小見出し]/){"{\\gtfamily #{$1}}"} end if line =~ /[#小見出し](.*?)[#小見出し終わり]/ line.gsub!(/[#小見出し](.*?)[#小見出し終わり]/){"{\\gtfamily #{$1}}"} end if line =~ /[#中見出し](.*?)[#中見出し終わり]/ line.gsub!(/[#中見出し](.*?)[#中見出し終わり]/){"{\\large #{$1}}"} end if line =~ /[#大見出し](.*?)[#大見出し終わり]/ line.gsub!(/[#大見出し](.*?)[#大見出し終わり]/){"{\\Large #{$1}}"} end if line =~ /[#ここから中見出し]/ line.gsub!(/[#ここから中見出し]/){"{\\large"} end if line =~ /[#ここで中見出し終わり]/ line.gsub!(/[#ここで中見出し終わり]/){"}"} end if line =~ /[#ページの左右中央]/ line.gsub!(/[#ページの左右中央]/, "\\vspace*{\\stretch{1}}") @sayuuchuuou = true end ## XXX 字詰めは1行の文字数が少ないので無視 if line =~ /[#ここから([1234567890一二三四五六七八九〇十]*)字詰め]/ line.gsub!(/[#ここから([1234567890一二三四五六七八九〇十]*)字詰め]/, "") end if line =~ /[#ここで字詰め終わり]/ line.gsub!(/[#ここで字詰め終わり]/, "") end # XXX 割り注も無視 if line =~ /[#ここから割り注]/ line.gsub!(/[#ここから割り注]/, "") end if line =~ /[#ここで割り注終わり]/ line.gsub!(/[#ここで割り注終わり]/, "") end if line =~ /[#「(.*?)」は太字]/ line.gsub!(/(.+)[#「\1」は太字]/,'{\\textbf{\1}}') end if line =~ /[#「.+?」は縦中横]/ line.gsub!(/(.+)[#「\1」は縦中横]/, '\\rensuji{\1}') end if line =~ /[#「(1)(/)(\d+)」は分数]/ bunshi = to_single_byte($1) bunbo = $3 line.gsub!(/(.+)[#「.+?」は分数]/, "\\rensuji{#{bunshi}/#{bunbo}}") end if line =~ /[#「.+?」は罫囲み]/ line.gsub!(/(.+)[#「\1」は罫囲み]/, '\\fbox{\1}') end if line =~ /[#「(.+?)」は(本文より)?([123456])段階大きな文字]/ line.gsub!(/([^[]+?)[#「\1」は(本文より)?([123456])段階大きな文字]/) { num = to_single_byte($3).to_i case num when 1 "{\\large #{$1}}" when 2 "{\\Large #{$1}}" when 3 "{\\LARGE #{$1}}" when 4 "{\\huge #{$1}}" when 5 "{\\Huge #{$1}}" when 6 "{\\Huge #{$1}}" end } end if line =~ /[#「.+?」は斜体]/ line.gsub!(/(.+)[#「\1」は斜体]/){ shatai = to_single_byte($1).tr("abcdefghijklmnopqrstuvwxyz","abcdefghijklmnopqrstuvwxyz") "\\rensuji{\\textsl{"+shatai+"}}" } end if line =~ /[#「[0-90-9]」は下付き小文字]/ line.gsub!(/([A-Za-za-zA-Zαβδγ])([0-90-9])[#「\2」は下付き小文字]/){ "$"+$1+"_{"+to_single_byte($2)+"}$" } end if line =~ /([^ ]*)[#ゴシック体]$/ line.gsub!(/([^ ]*)[#ゴシック体]/){"{\\gtfamily #{$1}}"} end if line =~ /[#「.+?」はゴシック体]/ line.gsub!(/(.+?)[#「\1」はゴシック体]/){"{\\gtfamily #{$1}}"} end if line =~ /[#ここから横組み](.*?)[#ここで横組み終わり]/ line.gsub!(/[#ここから横組み](.*?)[#ここで横組み終わり]/){ yoko_str = $1 yoko_str.gsub!(/π/,"\\pi ") yoko_str.gsub!(/=/,"=") yoko_str.gsub!(/(\d+)[#「\1」は指数]/){"^{#{$1}}"} "$"+yoko_str+"$" } end line.tr!("┌┐┘└│─┏┓┛┗┃━→","┐┘└┌─│┓┛┗┏━┃↓") if line =~ /[#改段]/ line.sub!(/[#改段]/, "\\clearpage") end if line =~ /[aioeu]\^/i line.gsub!(/([aioeu])\^/i){ "\\\^{#{$1}}"} end if line =~ /[aioeu]\'/i line.gsub!(/([aioeu])\'/i){ "\\\'{#{$1}}"} end if line =~ /[#天から.*?([1234567890一二三四五六七八九〇十]*)字下げ]/ num = to_single_byte($1).to_i if num > MAX_SAGE num = MAX_SAGE end outputfile.print "\\begin{jisage}{#{num}}\n" line = line.sub(/[#天から.*?字下げ]/, "")+"\n\\end{jisage}" end line.gsub!(/[#図形 □(四角)に内接する◆]/, '{\setlength{\fboxsep}{0pt}\fbox{◆}}') if line =~ /[#[^]]+?]/ line.gsub!(/[#([^]]+?)]/, '\\endnote{\1}') end if line =~ /\\[a-z]*?bou/ tuning_bou(line) end if line =~ /\\ajD?Kunoji\{\}\}/ line.gsub!(/(\\ajD?Kunoji)\{\}\}/, '\1}') end if line =~ /\\ruby/ tuning_ruby(line) end if line =~ /^$/ line = " " end outputfile.print normalize(line)+"\n" end # 底本の処理 outputfile.write(postamble()) outputfile.print normalize(line)+"\n" while line = inputfile.gets line.chomp! line = NKF::nkf('-wS', line) outputfile.print normalize(line)+"\n" end outputfile.print "\n\\end{minipage}\n\\end{teihon}\n\\end{document}\n" if @log_text.size > 0 until @log_text.empty? outputfile.print @log_text.shift end end end
normalize(l)
click to toggle source
UTF-8で出力
# File lib/aozora4reader.rb, line 34 def normalize(l) ##l.gsub!(/&/, '\\\\&') l.gsub!(/#/,'\\#') l.gsub!(/_/,'\\textunderscore{}') l.to_s end
postamble()
click to toggle source
底本の表示用
# File lib/aozora4reader.rb, line 105 def postamble str = <<"END_OF_POST" \\theendnotes \\begin{teihon} \\clearpage\\null\\newpage\\thispagestyle{empty} \\begin{minipage}<y>{\\textheight} \\vspace{1\\baselineskip} \\scriptsize END_OF_POST str end
preamble()
click to toggle source
プリアンブルの出力
# File lib/aozora4reader.rb, line 72 def preamble title = remove_ruby(@title) author = remove_ruby(@author) str = <<"END_OF_PRE" \\documentclass[a5paper]{tbook} %\\documentclass[a5paper, twocolumn]{tbook} %\\usepackage[deluxe]{otf} \\usepackage[expert, deluxe]{otf} %\\usepackage{utf} % Bookmarkの文字化け対策(日本語向け) \\ifnum 46273=\\euc"B4C1 % 46273 == 0xB4C1 == 漢(EUC-JP) \\usepackage{atbegshi}% \\AtBeginShipoutFirst{\\special{pdf:tounicode EUC-UCS2}}% \\else \\usepackage{atbegshi}% \\AtBeginShipoutFirst{\\special{pdf:tounicode 90ms-RKSJ-UCS2}}% \\fi \\usepackage[dvipdfm,bookmarks=false,bookmarksnumbered=false,hyperfootnotes=false,% pdftitle={#{title}},% pdfauthor={#{author}}]{hyperref} \\usepackage{furikana} \\usepackage{type1cm} \\usepackage[size=large]{aozora4reader} \\def\\rubykatuji{\\rubyfamily\\tiny} %\\def\\rubykatuji{\\tiny}%for UTF package %\\renewenvironment{teihon}{\\comment}{\\endcomment} END_OF_PRE str end
remove_ruby(str)
click to toggle source
ルビの削除(表題等)
# File lib/aozora4reader.rb, line 67 def remove_ruby(str) str.gsub(/\\ruby{([^}]+)}{[^}]*}/i){$1} end
to_single_byte(str)
click to toggle source
全角→半角
# File lib/aozora4reader.rb, line 42 def to_single_byte(str) s = str.dup if s =~ /[0-9]/ s.tr!("1234567890", "1234567890") elsif s =~ /[一二三四五六七八九〇]/ s.tr!("一二三四五六七八九〇", "1234567890") end case s when /\d十\d/ s.sub!(/(\d)十(\d)/, '\1\2') when /\d十/ s.sub!(/(\d)十/, '\{1}0') when /十\d/ s.sub!(/十(\d)/, '1\1') when /十/ s.sub!(/十/, "10") end if s =~/[!?]/ s.tr!("!?", "!?") end return s end
translate_accent(l)
click to toggle source
アクセントの処理用 www.aozora.gr.jp/accent_separation.html cosmoshouse.com/tools/acc-conv-j.htm
# File lib/aozora4reader.rb, line 121 def translate_accent(l) l.gsub!(/([ij]):/){"\\\"{\\#{$1}}"} l.gsub!(/([AIOEUaioeu])(['`~^])/){"\\#$2{#$1}"} l.gsub!(/([AIOEUaioeu]):/){"\\\"{#$1}"} l.gsub!(/([AIOEUaioeu])_/){"\\={#$1}"} l.gsub!(/([!?])@/){"#$1'"} l.gsub!(/([Aa])&/){"\\r{#$1}"} l.gsub!(/AE&/){"\\AE{}"} l.gsub!(/ae&/){"\\ae{}"} l.gsub!(/OE&/){"\\OE{}"} l.gsub!(/oe&/){"\\oe{}"} l.gsub!(/s&/){"\\ss{}"} l.gsub!(/([cC]),/){"\\c{#$1}"} l.gsub!(/〔/,'') l.gsub!(/〕/,'') return l end
translate_bousen(l)
click to toggle source
傍線の処理用
# File lib/aozora4reader.rb, line 282 def translate_bousen(l) if l =~ /[#「.+?」に傍線]/ l.gsub!(/(.+?)[#「\1」に傍線]/, '\\bousen{\1}') end if l =~ /[#「.+?」に二重傍線]/ l.gsub!(/(.+?)[#「\1」に二重傍線]/, '\\bousen{\1}') end if l =~ /[#「.+?」に鎖線]/ l.gsub!(/(.+?)[#「\1」に鎖線]/, '\\bousen{\1}') end if l =~ /[#「.+?」に破線]/ l.gsub!(/(.+?)[#「\1」に破線]/, '\\bousen{\1}') end if l =~ /[#「.+?」に波線]/ l.gsub!(/(.+?)[#「\1」に波線]/, '\\bousen{\1}') end return l end
translate_bouten(l)
click to toggle source
傍点の処理用
# File lib/aozora4reader.rb, line 248 def translate_bouten(l) bouten_list = [ ["傍点", "bou"], ["白ゴマ傍点","sirogomabou"], ["丸傍点","marubou"], ["白丸傍点","siromarubou"], ["黒三角傍点","kurosankakubou"], ["白三角傍点","sirosankakubou"], ["二重丸傍点","nijyuumarubou"], ["蛇の目傍点","jyanomebou"]] bouten_list.each{ |name, fun| if l =~ /[#「.+?」に#{name}]/ l.gsub!(/(.+?)[#.*?「\1」に#{name}]/){ str = $1 str.gsub!(/(\\UTF{.+?})/){ "{"+$1+"}"} str.gsub!(/(\\ruby{.+?}{.+?})/i){ "{"+$1+"}"} "\\#{fun}{"+str+"}" } end } if l =~ /[#傍点].+?[#傍点終わり]/ l.gsub!(/[#傍点](.+?)[#傍点終わり]/){ str = $1 str.gsub!(/(\\UTF{.+?})/){ "{"+$1+"}"} str.gsub!(/(\\ruby{.+?}{.+?})/i){ "{"+$1+"}"} "\\bou{"+str+"}" } end return l end
translate_gaiji(l)
click to toggle source
外字の処理用
# File lib/aozora4reader.rb, line 141 def translate_gaiji(l) if l =~/※[#([^]]*)、([^、]]*)]/ if @gaiji2[$1] l.gsub!(/※[#([^]]*)、([^、]]*)]/){@gaiji2[$1]} end end ## ※[#「姉」の正字、「女+※[#第3水準1-85-57]のつくり」、256-下-16] if l =~/※[#([^]]*※[#[^]]*][^]]*)、([^、]]*)]/ if @gaiji2[$1] l.gsub!(/※[#([^]]*※[#[^]]*][^]]*)、([^、]]*)]/){@gaiji2[$1]} end end ## ※[#「さんずい+闊」] if l =~ /※[#「([^]]+?)」]/ if @gaiji2[$1] l.gsub!(/※[#「([^]]+?)」]/){@gaiji2[$1]} end end if l =~ /※[#[^]]*?※[#[^]]*?[12]\-\d{1,2}\-\d{1,2}[^]]*?][^]]*?]/ l.gsub!(/※[#([^]]*?)※[#([^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?)]([^]]*?)]/){"※\\footnote{#$1"+@gaiji[$3]+"#$4}"} end if l =~ /※[#[^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?]/ if @gaiji[$1] l.gsub!(/※[#([^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?)]/){@gaiji[$2]} end end if l =~ /※[#濁点付き片仮名([ワヰヱヲ])、.*?]/ l.gsub!(/※[#濁点付き片仮名([ワヰヱヲ])、.*?]/){ "\\ajLig{#{$1}゛}"} end if l =~ /※[#感嘆符三つ.*]/ l.gsub!(/※[#感嘆符三つ.*?]/){ "\\rensuji{!!!}"} end if l =~ /※[#.*?([A-Za-z0-9_]+\.png).*?]/ l.gsub!(/※[#([^]]+?)]/, "\\includegraphics{#{$1}}") end if l =~ /※[#[^]]+?]/ l.gsub!(/※[#([^]]+?)]/, '※\\footnote{\1}') end if l =~ /※/ STDERR.puts("Remaining Unprocessed Gaiji Character in Line #@line_num.") @log_text << normalize("未処理の外字が#{@line_num}行目にあります.\n") end return l end
translate_ruby(l)
click to toggle source
ルビの処理用
# File lib/aozora4reader.rb, line 191 def translate_ruby(l) # 被ルビ文字列内に外字の注記があるばあい,ルビ文字列の後ろに移動する # ただし,順番が入れ替わってしまう while l =~ /※\\footnote\{[^(?:\\footnote)]+\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》/ l.sub!(/(※)(\\footnote\{[^(?:\\footnote)]+\})((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》)/, '\1\3\2') end # 被ルビ文字列内に誤記などの注記が存在する場合は、ルビの後ろに移動する while l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?[#[^]]*?](?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》/ l.sub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)([#[^]]*?])((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》)/, '\1\3\2') end # ルビ文字列内に誤記などの注記が存在する場合は、ルビの後ろに移動する if l =~ /《[^》]*?[#[^]]*?][^》]*?》/ l.gsub!(/(《[^》]*?)([#[^]]*?])([^》]*?》)/, '\1\3\2') end # 一連のルビの処理 # 1 縦棒ありの場合 if l =~ /|/ l.gsub!(/|(.+?)《(.+?)》/, '\\ruby{\1}{\2}') end # 2 漢字および外字 if l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?《.+?》/ l.gsub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)《(.+?)》/, '\\ruby{\1}{\2}') end # 3 ひらがな if l =~ /[あ-ん](?:[ぁ-んーヽヾ]|\\CID\{12107\})*?《.+?》/ l.gsub!(/([あ-ん](?:[ぁ-んーヽヾ]|\\CID\{12107\})*?)《(.+?)》/, '\\ruby{\1}{\2}') end # 4 カタカナ if l =~ /[ア-ヴ](?:[ァ-ヴーゝゞ]|\\CID\{12107\})*?《.+?》/ l.gsub!(/([ア-ヴ](?:[ァ-ヴーゝゞ]|\\CID\{12107\})*?)《(.+?)》/, '\\ruby{\1}{\2}') end # 5 全角アルファベットなど if l =~ /[A-Za-zΑ-Ωα-ωА-Яа-я・]+?《.+?》/ l.gsub!(/([A-Za-zΑ-Ωα-ωА-Яа-я・]+?)《(.+?)》/, '\\ruby{\1}{\2}') end # 6 半角英数字 if l =~ /[A-Za-z0-9#_\-\;\&.\'\^\`\\\{\} ]+?《.+?》/ l.gsub!(/([A-Za-z0-9#_\-\;\&.\'\^\`\\\{\} ]+?)《(.+?)》/, '\\ruby{\1}{\2}') end if l =~ /《.*?》/ STDERR.puts("Unknown ruby pattern found in #@line_num.") @log_text << normalize("未処理のルビが#{@line_num}行目にあります.\n") end return l end
tuning_bou(l)
click to toggle source
傍点の調整
# File lib/aozora4reader.rb, line 321 def tuning_bou(l) # 傍点の中の「くの字点」を変換 while l =~ /(\\[a-z]*?bou\{(?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)(\\ajD?Kunoji)\{\}((?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?)\}/ l.gsub!(/((\\([a-z]*?)bou)\{(?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)(\\ajD?Kunoji)\{\}((?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?)\}/, '\1}\4with\3Bou\2{\5}') end if l =~ /\\[a-z]*?bou\{\}/ l.gsub!(/\\([a-z]*?)bou\{\}/, '{}') end return l end
tuning_ruby(l)
click to toggle source
ルビの調整
# File lib/aozora4reader.rb, line 302 def tuning_ruby(l) # 1 直前が漢字の場合 if l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))\\ruby/ l.gsub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\})))\\ruby/, '\1\\Ruby') end # 2 直後が漢字の場合 if l =~ /\\ruby\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\\{\}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))/ l.gsub!(/\\ruby(\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\\{\}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\})))/, '\\Ruby\1') end # 3 ルビが連続する場合 while l =~ /\\(?:ruby|RUBY|Ruby)\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\{}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\\ruby/ l.sub!(/\\(?:ruby|RUBY|Ruby)(\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\{}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\})\\ruby/, '\\RUBY\1\\RUBY') end end