class TomosiaWallhereCrawl::CrawlImage

Public Instance Methods

crawldata(key,destination,max=nil) click to toggle source
# File lib/tomosia_wallhere_crawl.rb, line 23
def crawldata(key,destination,max=nil)        
             sum = 0
                        index = 1
                        images = []
                while sum != max do
                    # Open url
                    url = "https://wallhere.com/en/wallpapers?q=#{key}&page=#{index}"
                    document = open(url)
                    content = document.read
                    parsed_content = Nokogiri::HTML(content)                           
                    length = parsed_content.css('.item').to_a.length - 1
                                total_img = parsed_content.css('div.hub-totalinfo').text.split(' HD Wallpapers')[0].to_i
                                if max == nil || max > total_img
                                        max = total_img
                                        puts "This tag has #{total_img} pictures"        
                                end
                                        i = 0
                           for i in i..length
                                                        urlimg = parsed_content.css('.item').to_a[i].children.children.first.to_h['src']
                                                        images.push(urlimg)
                                                                
                                                                print '.'
                                                                sum += 1
                                                      if max == sum 
                                                 break
                                        end
                                end
                            index += 1
                        end
                        download(images,destination)
        end
download(images,destination) click to toggle source
# File lib/tomosia_wallhere_crawl.rb, line 55
        def download(images,destination)
                data = []
                row = {}
                thread = []
                images.each do |img| 
                                thread << Thread.new(img) do
                                timeout = 0
                                begin 
                                        open(img) do |image|
                                                nameimg = File.basename(img,".jpg!s")
                                                ui = img
                                                ex = File.extname(img).delete('.!s')
                                                size = ""
                                                File.open("#{destination}/#{nameimg}","wb") do |file|
                                                        file.write(image.read)
                                                        size = image.size
                                                end
                                                size = size.to_s + " bytes"
                                                row = {"name"=>nameimg, "url"=>ui, "extension"=> ex, "size" => size}
                                                data.push(row)
                                        end
                                rescue => exception
                                        if timeout < 3
                                                timeout += 1
                                                retry
                                        else
                                                next
                                        end
                                end        
                        end
                end
                thread.each {|t| t.join}
                puts " "
                puts "Download successfully"
                savedata(data,destination)
end
savedata(data = {}, destination) click to toggle source
# File lib/tomosia_wallhere_crawl.rb, line 8
def savedata (data = {}, destination)
  workbook  = WriteExcel.new("#{destination}/InfoImage.xls")
  worksheet = workbook.add_worksheet
    data.each_with_index do |row, stt|
    row.each do |key, value|
      worksheet.write_string(stt, 0, row['name'])
      worksheet.write_url(stt, 1, row['url'])
      worksheet.write_string(stt, 2, row['extension'])
      worksheet.write_string(stt, 3, row['size'])
      end
    end
        workbook.close
        puts "Save successfully"
end