class ScrapeCompanys

Public Class Methods

new() click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 45
def initialize()
        @base_query_address = "http://bvmf.bmfbovespa.com.br/pt-br/mercados/acoes/empresas/ExecutaAcaoConsultaInfoEmp.asp?CodCVM="
        @code_source = "http://cvmweb.cvm.gov.br/SWB/Sistemas/SCW/CPublica/CiaAb/FormBuscaCiaAbOrdAlf.aspx?LetraInicial="
end

Public Instance Methods

extract_data_composicao_capital_social(source) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 68
def extract_data_composicao_capital_social(source)
        simplified = source[0]
        simplified[3..simplified.size]
end
extract_many_td(source) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 93
def extract_many_td(source)
        divided = source[0].split("\r\n")
        simplified = divided[1..divided.size]
        output = []

        simplified.each do |s|
                s.reverse!
                output << s[(s.index(">dt/<") + 4)..(s.index(">\"") - 1)].reverse
        end

        output        
end
extract_many_th(source) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 73
def extract_many_th(source)
        divided = source[0].split("\r\n")
        simplified = divided[1..divided.size]
        output = []

        simplified.each do |s|
                s.reverse!

                if s.index(">ht/<") != nil then
                        begin
                                output << s[(s.index(">ht/<") + 4)..(s.index(">\"") - 1)].reverse
                        rescue => e
                                output << s[(s.index(">ht/<") + 4)..(s.index(">h") - 1)]
                        end
                end
        end

        output        
end
extract_simple_text(source) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 50
def extract_simple_text(source)
        simplified = source[0].reverse

        begin
                simplified = simplified[(simplified.index(">dt/<") + 5)..(simplified.index(">dt<") - 1)]
        rescue => e
                simplified = simplified[(simplified.index(">dt/<") + 5)..(simplified.index("\>\""))]
        end

        simplified.reverse
end
extract_site(source) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 62
def extract_site(source)
        simplified = source[0].reverse
        simplified = simplified[(simplified.index(">a/<") + 4)..(simplified.index(">\"") - 1)]
        simplified.reverse
end
get_company_data(cvm) click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 106
def get_company_data(cvm)
        output = {}
        source = Net::HTTP.get(URI("#{@base_query_address}#{cvm}"))
        output[:nome] = extract_simple_text(source.scan(/Nome de Preg.+\r\n.+\<\/td\>/))
        output[:cnpj] = extract_simple_text(source.scan(/CNPJ.+\r\n.+\<\/td\>/))
        output[:site] = extract_site(source.scan(/Site:\<\/td\>\r\n.+\<\/a\>/))

        output[:balanco_patrimonial] = {}
        output[:balanco_patrimonial][:periodo] = extract_many_th(source.scan(/Balan.+\r\n.+\r\n.+\r\n/))
        output[:balanco_patrimonial][:ativo_permanente] = extract_many_td(source.scan(/Ativo Permanente.+\r\n.+\r\n.+\r\n/))
        output[:balanco_patrimonial][:ativo_total] = extract_many_td(source.scan(/Ativo Total.+\r\n.+\r\n.+\r\n/))
        output[:balanco_patrimonial][:patromonio_liquido] = extract_many_td(source.scan(/Patrim.+nio L.+quido.+\r\n.+\r\n.+\r\n/))
        
        output[:demonstracao_do_resultado] = {}
        output[:demonstracao_do_resultado][:periodo] = extract_many_th(source.scan(/Demonstra.+o do Resultado.+\r\n.+\r\n.+\r\n.+\r\n/))
        output[:demonstracao_do_resultado][:receitas_da_intermediacao_financeira] = extract_many_td(source.scan(/Receitas da Intermedia.+ Financeira.+\r\n.+\r\n.+\r\n/))
        output[:demonstracao_do_resultado][:resultado_bruto_de_intermediacao_financeira] = extract_many_td(source.scan(/Resultado Bruto da Intermedia.+ Financeira.+\r\n.+\r\n.+\r\n/))
        output[:demonstracao_do_resultado][:resultado_operacional] = extract_many_td(source.scan(/Resultado Operacional.+\r\n.+\r\n.+\r\n/))
        output[:demonstracao_do_resultado][:lucro_liquido] = extract_many_td(source.scan(/Lucro .+ L.+quido.+\r\n.+\r\n.+\r\n/))

        output[:demonstracao_do_fluxo_de_caixa] = {
        periodo: extract_many_th(source.scan(/Demonstra.+ do Fluxo de Caixa.+\r\n.+\r\n.+\r\n/)),
        atividades_operacionais: extract_many_td(source.scan(/Atividades Operacionais.+\r\n.+\r\n.+\r\n/)),
        atividades_de_investimento: extract_many_td(source.scan(/Atividades de Investimento.+\r\n.+\r\n.+\r\n/)),
        atividades_de_financiamento: extract_many_td(source.scan(/Atividades de Financiamento.+\r\n.+\r\n.+\r\n/)),
        variacao_cambial_sobre_caixa_e_equivalentes: extract_many_td(source.scan(/Varia.+o Cambial sobre Caixa e Equivalentes.+\r\n.+\r\n.+\r\n/)),
        aumento_de_caixa_e_equivalentes: extract_many_td(source.scan(/Aumento .+ de Caixa e Equivalentes.+\r\n.+\r\n.+\r\n/))}

        output[:posicao_acionaria] = {
                headings: extract_many_th(source.scan(/\<th\>Nome.+\r\n.+\r\n.+\r\n.+\r\n/)),
                outros: extract_many_td(source.scan(/\<td\>Outros.+\n.+\n.+\n.+\n/)),
                total: extract_many_td(source.scan(/\<td\>Total.+\n.+\<td.+\n.+\<td.+\n.+\<td.+\n/))}

        output[:composicao_capital_social] = {
                data: extract_data_composicao_capital_social(source.scan(/ - [0-9]+\/[0-9]+\/[0-9]+/)),
                ordinarias: extract_simple_text(source.scan(/Ordin.+rias.+\n.+\n/)),
                preferenciais: extract_simple_text(source.scan(/Preferenciais.+\n.+\n/)),
                total: extract_simple_text(source.scan(/Total.+\n.+\n.+\<\/tr/))}

        output
end
get_cvms() click to toggle source
# File lib/scrapper_bovespa/scrapper.rb, line 148
def get_cvms()
        cvms = []
        guidelines = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 
                                                                'H', 'I', 'J', 'K', 'L', 'M', 'N', 
                                                                'O', 'P', 'Q', 'R', 'S', 'T', 'U', 
                                                                'V', 'W', 'Y', 'X', 'Z', '0', '1', 
                                                                '2', '3', '4', '5', '6', '7', '8', '9']

        guidelines.each do |k|
                source = Net::HTTP.get(URI("#{@code_source}#{k}")).scan(/\>[0-9]+\<\/a\>/)

                source.each do |str|
                        sub = str[1..(str.length - 5)]
                        if sub.length > 1 then
                                cvms << sub.to_i
                        end
                end

                source = nil
        end

        puts cvms.size
        cvms
end