class HtmlCodeCleaner
This class has a single dictionary of html code and its corresponding symbol. The code takes a string as input, and removes any html code and inserts the corresponding symbol. The output is a string.
Public Class Methods
clean_string(string)
click to toggle source
removes character codes from string must be defined in the dictionary
# File lib/HtmlCodeCleaner.rb, line 10 def self.clean_string(string) if (string != nil) # This is the dictionary containing the html code and it's replacement symbol. dict = [ [' '," "], ['!',"!"], ['"','"'], ['"','"'], ['#','#'], ['$',"$"], ['%',"%"], ['&',"&"], ['&',"&"], [''',"'"], ['(',"("], [')',")"], ['*',"*"], ['+',"+"], [',',","], ['-',"-"], ['.',"0"], ['/',"/"], ['0',"0"], ['1',"1"], ['2',"2"], ['3',"3"], ['4',"4"], ['5',"5"], ['6',"6"], ['7',"7"], ['8',"8"], ['9',"9"], [':',":"], [';',";"], ['<',"<"], ['<',"<"], ['=',"="], ['>',">"], ['>',">"], ['?',"?"], ['@',"@"], ['A',"A"], ['a',"a"], ['B',"B"], ['b',"b"], ['C',"C"], ['c',"c"], ['D',"D"], ['d',"d"], ['E',"E"], ['e',"e"], ['F',"F"], ['f',"f"], ['G',"G"], ['g',"g"], ['H',"H"], ['h',"h"], ['I',"I"], ['i',"i"], ['J',"J"], ['j',"j"], ['K',"K"], ['k',"k"], ['L',"L"], ['l',"l"], ['M',"M"], ['m',"m"], ['N',"N"], ['n',"n"], ['O',"O"], ['o',"o"], ['P',"P"], ['p',"p"], ['Q',"Q"], ['q',"q"], ['R',"R"], ['r',"r"], ['S',"S"], ['s',"s"], ['T',"T"], ['t',"t"], ['U',"U"], ['u',"u"], ['V',"V"], ['v',"v"], ['W',"W"], ['w',"w"], ['X',"X"], ['x',"x"], ['Y',"Y"], ['y',"y"], ['Z',"Z"], ['z',"z"], ['[',"["], ['\','\ '], [']',"]"], ['^',"^"], ['_',"_"], ['`',"`"], ['{',"{"], ['|',"|"], ['}',"}"], ['~',"~"], ['€',"€"], [''," "], ['‚',"‚"], ['ƒ',"ƒ"], ['„',"„"], ['…',"…"], ['†',"†"], ['‡',"‡"], ['ˆ',"ˆ"], ['‰',"‰"], ['Š',"Š"], ['‹',"‹"], ['Œ',"Œ"], [''," "], ['Ž',"Ž"], [''," "], [''," "], ['‘',"‘"], ['’',"’"], ['“','"'], ['”',"”"], ['•',"•"], ['–',"–"], ['—',"—"], ['˜',"˜"], ['™',"™"], ['š',"š"], ['›',"›"], ['œ',"œ"], [''," "], ['ž',"ž"], ['Ÿ',"Ÿ"], [' '," "], [' '," "], ['¡',"¡"], ['¡',"¡"], ['¢',"¢"], ['¢',"¢"], ['£',"£"], ['£',"£"], ['¤',"¤"], ['¤',"¤"], ['¥',"¥"], ['¥',"¥"], ['¦',"¦"], ['¦',"¦"], ['§',"§"], ['§',"§"], ['¨',"¨"], ['¨',"¨"], ['©',"©"], ['©',"©"], ['ª',"ª"], ['ª',"ª"], ['«',"«"], ['«',"«"], ['¬',"¬"], ['¬',"¬"], ['­',""], ['­',""], ['®',"®"], ['®',"®"], ['¯',"¯"], ['¯',"¯"], ['°',"°"], ['°',"°"], ['±',"±"], ['±',"±"], ['²',"²"], ['²',"²"], ['³',"³"], ['³',"³"], ['´',"´"], ['´',"´"], ['µ',"µ"], ['µ',"µ"], ['¶',"¶"], ['¶',"¶"], ['·',"·"], ['·',"·"], ['¸',"¸"], ['¸',"¸"], ['¹',"¹"], ['¹',"¹"], ['º',"º"], ['º',"º"], ['»',"»"], ['»',"»"], ['¼',"¼"], ['¼',"¼"], ['½',"½"], ['½',"½"], ['¾',"¾"], ['¾',"¾"], ['¿',"¿"], ['¿',"¿"], ['À',"À"], ['Á',"Á"], ['Â',"Â"], ['Ã',"Ã"], ['Ä',"Ä"], ['Å',"Å"], ['Æ',"Æ"], ['Ç',"Ç"], ['È',"È"], ['É',"É"], ['Ê',"Ê"], ['Ë',"Ë"], ['Ì',"Ì"], ['Í',"Í"], ['Î',"Î"], ['Ï',"Ï"], ['Ð',"Ð"], ['Ñ',"Ñ"], ['Ò',"Ò"], ['Ó',"Ó"], ['Ô',"Ô"], ['Õ',"Õ"], ['Ö',"Ö"], ['×',"×"], ['Ø',"Ø"], ['Ù',"Ù"], ['Ú',"Ú"], ['Û',"Û"], ['Ü',"Ü"], ['Ý',"Ý"], ['Þ',"Þ"], ['ß',"ß"], ['à',"à"], ['á',"á"], ['â',"â"], ['ã',"ã"], ['ä',"ä"], ['å',"å"], ['æ',"æ"], ['ç',"ç"], ['è',"è"], ['é',"é"], ['ê',"ê"], ['ë',"ë"], ['ì',"ì"], ['í',"í"], ['î',"î"], ['ï',"ï"], ['ð',"ð"], ['ñ',"ñ"], ['ò',"ò"], ['ó',"ó"], ['ô',"ô"], ['õ',"õ"], ['ö',"ö"], ['÷',"÷"], ['ø',"ø"], ['ù',"ù"], ['ú',"ú"], ['û',"û"], ['ü',"ü"], ['ý',"ý"], ['þ',"þ"], ['ÿ',"ÿ"], ['Œ',"Œ"], ['œ',"œ"], ['Š',"Š"], ['š',"š"], ['Ÿ',"Ÿ"], ['ƒ',"ƒ"],['ƒ',"ƒ"], ['Α',"Α"],['Α',"Α"], ['Β',"Β"],['Β',"Β"], ['Γ',"Γ"],['Γ',"Γ"], ['Δ',"Δ"],['Δ',"Δ"], ['Ε',"Ε"],['Ε',"Ε"], ['Ζ',"Ζ"],['Ζ',"Ζ"], ['Η',"Η"],['Η',"Η"], ['Θ',"Θ"],['Θ',"Θ"], ['Ι',"Ι"],['Ι',"Ι"], ['Κ',"Κ"],['Κ',"Κ"], ['Λ',"Λ"],['Λ',"Λ"], ['Μ',"Μ"],['Μ',"Μ"], ['Ν',"Ν"],['Ν',"Ν"], ['Ξ',"Ξ"],['Ξ',"Ξ"], ['Ο',"Ο"],['Ο',"Ο"], ['Π',"Π"],['Π',"Π"], ['Ρ',"Ρ"],['Ρ',"Ρ"], ['Σ',"Σ"],['Σ',"Σ"], ['Τ',"Τ"],['Τ',"Τ"], ['Υ',"Υ"],['Υ',"Υ"], ['Φ',"Φ"],['Φ',"Φ"], ['Χ',"Χ"],['Χ',"Χ"], ['Ψ',"Ψ"],['Ψ',"Ψ"], ['Ω',"Ω"],['Ω',"Ω"], ['α',"α"],['α',"α"], ['β',"β"],['β',"β"], ['γ',"γ"],['γ',"γ"], ['δ',"δ"],['δ',"δ"], ['ε',"ε"],['ε',"ε"], ['ζ',"ζ"],['ζ',"ζ"], ['η',"η"],['η',"η"], ['θ',"θ"],['θ',"θ"], ['ι',"ι"],['ι',"ι"], ['κ',"κ"],['κ',"κ"], ['λ',"λ"],['λ',"λ"], ['μ',"μ"],['μ',"μ"], ['ν',"ν"],['ν',"ν"], ['ξ',"ξ"],['ξ',"ξ"], ['ο',"ο"],['ο',"ο"], ['π',"π"],['π',"π"], ['ρ',"ρ"],['ρ',"ρ"], ['ς',"ς"],['ς',"ς"], ['σ',"σ"],['σ',"σ"], ['τ',"τ"],['τ',"τ"], ['υ',"υ"],['υ',"υ"], ['φ',"φ"],['φ',"φ"], ['χ',"χ"],['χ',"χ"], ['ψ',"ψ"],['ψ',"ψ"], ['ω',"ω"],['ω',"ω"], ['ϑ',"ϑ"],['ϑ',"ϑ"], ['ϒ',"ϒ"],['ϒ',"ϒ"], ['ϖ',"ϖ"],['ϖ',"ϖ"], ['–',"–"], ['—',"—"], ['‘',"‘"], ['’',"’"], ['‚',"‚"], ['“',"“"], ['”',"”"], ['„',"„"], ['†',"†"], ['‡',"‡"], ['•',"•"],['•',"•"], ['…',"…"],['…',"…"], ['‰',"‰"], ['′',"′"],['′',"′"], ['″',"″"],['″',"″"], ['‾',"‾"],['‾',"‾"], ['⁄',"⁄"],['⁄',"⁄"], ['€',"€"],['€',"€"], ['ℑ',"ℑ"],['ℑ',"ℑ"], ['℘',"℘"],['℘',"℘"], ['ℜ',"ℜ"],['ℜ',"ℜ"], ['™',"™"],['™',"™"], ['ℵ',"ℵ"],['ℵ',"ℵ"], ['←',"←"],['←',"←"], ['↑',"↑"],['↑',"↑"], ['→',"→"],['→',"→"], ['↓',"↓"],['↓',"↓"], ['↔',"↔"],['↔',"↔"], ['↵',"↵"],['↵',"↵"], ['⇐',"⇐"],['⇐',"⇐"], ['⇑',"⇑"],['⇑',"⇑"], ['⇒',"⇒"],['⇒',"⇒"], ['⇓',"⇓"],['⇓',"⇓"], ['⇔',"⇔"],['⇔',"⇔"], ['∀',"∀"],['∀',"∀"], ['∂',"∂"],['∂',"∂"], ['∃',"∃"],['∃',"∃"], ['∅',"∅"],['∅',"∅"], ['∇',"∇"],['∇',"∇"], ['∈',"∈"],['∈',"∈"], ['∉',"∉"],['∉',"∉"], ['∋',"∋"],['∋',"∋"], ['∏',"∏"],['∏',"∏"], ['∑',"∑"],['∑',"∑"], ['−',"−"],['−',"−"], ['∗',"∗"],['∗',"∗"], ['√',"√"],['√',"√"], ['∝',"∝"],['∝',"∝"], ['∞',"∞"],['∞',"∞"], ['∠',"∠"],['∠',"∠"], ['∧',"∧"],['∧',"∧"], ['∨',"∨"],['∨',"∨"], ['∩',"∩"],['∩',"∩"], ['∪',"∪"],['∪',"∪"], ['∫',"∫"],['∫',"∫"], ['∴',"∴"],['∴',"∴"], ['∼',"∼"],['∼',"∼"], ['≅',"≅"],['≅',"≅"], ['≈',"≈"],['≈',"≈"], ['≠',"≠"],['≠',"≠"], ['≡',"≡"],['≡',"≡"], ['≤',"≤"],['≤',"≤"], ['≥',"≥"],['≥',"≥"], ['⊂',"⊂"],['⊂',"⊂"], ['⊃',"⊃"],['⊃',"⊃"], ['⊄',"⊄"],['⊄',"⊄"], ['⊆',"⊆"],['⊆',"⊆"], ['⊇',"⊇"],['⊇',"⊇"], ['⊕',"⊕"],['⊕',"⊕"], ['⊗',"⊗"],['⊗',"⊗"], ['⊥',"⊥"],['⊥',"⊥"], ['⋅',"⋅"],['⋅',"⋅"], ['⌈',"⌈"],['⌈',"⌈"], ['⌉',"⌉"],['⌉',"⌉"], ['⌊',"⌊"],['⌊',"⌊"], ['⌋',"⌋"],['⌋',"⌋"], ['〈',"⟨"],['⟨',"⟨"], ['〉',"⟩"],['⟩',"⟩"], ['◊',"◊"],['◊',"◊"], ['♠',"♠"],['♠',"♠"], ['♣',"♣"],['♣',"♣"], ['♥',"♥"],['♥',"♥"], ['♦',"♦"],['♦',"♦"] ] # Parse the string of the html code and make the replacement dict.each { |x| string = string.gsub(x.first, x.last) } # return the string string end end